In [None]:
import sys
if "google.colab" in sys.modules:
    print("Colab Detected")
    import tensorflow as tf
    gpus = tf.config.list_physical_devices("GPU")
    if not gpus:
        raise RuntimeError("Nessuna GPU trovata.Controlla di aver selezionato il runtime giusto.")
    else:
        print(f"Trovate {len(gpus)} GPU:\n{gpus}")

    !git clone https://github.com/AtomicDonuts/Progetto_Computings.git
    %cd Progetto_Computings/
    !pip install -q -r requirements.txt
    !python3 fits_import/fits2csv.py

    sys.path.append("imports/")
    import custom_variables as custom_paths
else:
    print("Local Machine Detected")
    sys.path.append("../imports/")
    import custom_variables as custom_paths
    import nn_models as ann

In [None]:
from keras.layers import Dense, Input, Concatenate,Flatten, Dropout
from keras.models import Model,Sequential
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import StratifiedKFold
from tensorflow.keras.utils import plot_model
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

In [None]:
df = pd.read_csv(custom_paths.csv_path)
df = df[(df["CLASS_GENERIC"] == "AGN") | (df["CLASS_GENERIC"] == "Pulsar")]
print(f"Dataset Dimentions: {len(df)}")

df["PowerLaw"] = np.where(df["SpectrumType"] == "PowerLaw",1,0,)
df["LogParabola"] = np.where(df["SpectrumType"] == "LogParabola",1,0,)
df["PLSuperExpCutoff"] = np.where(df["SpectrumType"] == "PLSuperExpCutoff",1,0,)

norm_cols = ["GLAT", "PowerLaw","LogParabola","PLSuperExpCutoff" ,"Variability_Index"]
input_datas = df[norm_cols].to_numpy()

is_agn = df["CLASS_GENERIC"].to_numpy() == "AGN"
is_psr = df["CLASS_GENERIC"].to_numpy() == "Pulsar"
labels = np.zeros((len(df)), dtype=int)
labels[is_agn] = 0
labels[is_psr] = 1

In [None]:
acc_per_fold = []
loss_per_fold = []
fold_no = 1
skf = StratifiedKFold(n_splits=10,shuffle=True)
for train,test in skf.split(input_datas,labels):
    model = ann.simple_model(input_datas.shape[1:])
    model.compile(loss="binary_crossentropy",
                optimizer="adam",
                metrics=["accuracy","auc"],
                )
    model.optimizer.learning_rate = 0.01
    print('------------------------------------------------------------------------')
    print(f'Training for fold {fold_no} ...')
    # early_stopping = EarlyStopping(
    #     monitor="accuracy",
    #     patience=10,
    #     restore_best_weights=True
    # )
    # reduce_lr = ReduceLROnPlateau(monitor="accuracy", factor=0.5, patience=5)

    history = model.fit(
        input_datas[train],
        labels[train],
        #batch_size=,
        #validation_split=0.5,
        epochs=50,
        #callbacks=[early_stopping,reduce_lr],
        verbose = 2,
    )
    #scores = model.evaluate(input_datas[test], labels[test], verbose=0)
    #print(f'Score for fold {fold_no}: {model.metrics_names[0]} of {scores[0]}; {model.metrics_names[1]} of {scores[1]*100}%')
    #loss_per_fold.append(scores)
    #acc_per_fold.append(scores[1] * 100)
    fold_no = fold_no + 1

In [None]:
score = model.evaluate(input_datas,labels,verbose=1)

In [None]:
model.metrics_names[1]

In [None]:
# == Provide average scores ==
print("------------------------------------------------------------------------")
print("Score per fold")
for i in range(0, len(acc_per_fold)):
    print("------------------------------------------------------------------------")
    print(f"> Fold {i+1} - Loss: {loss_per_fold[i]} - Accuracy: {acc_per_fold[i]}%")
print("------------------------------------------------------------------------")
print("Average scores for all folds:")
print(f"> Accuracy: {np.mean(acc_per_fold)} (+- {np.std(acc_per_fold)})")
print(f"> Loss: {np.mean(loss_per_fold)}")
print("------------------------------------------------------------------------")

In [None]:
predicts = model.predict(input_datas)

In [None]:
import sklearn.metrics as sk_metrics

In [None]:
def best_accuracy(y_true, y_prob):
    """Trova la soglia che massimizza l'accuratezza sul training set."""
    thresholds = np.linspace(0, 1, 101)
    best_threshold = 0.5
    best_acc = 0.0
    for t in thresholds:
        y_pred = (y_prob >= t).astype(int)
        acc = sk_metrics.accuracy_score(y_true, y_pred)
        if acc > best_acc:
            best_acc = acc
            best_threshold = t
    return best_acc, best_threshold

In [None]:
def equal_accuracy(y_true, y_prob):
    """Trova la soglia che minimizza la differenza di accuratezza tra le classi."""
    thresholds = np.linspace(0, 1, 101)
    best_threshold = 0.5
    min_diff = 1.0
    best_agn = 0
    best_psr = 0
    for t in thresholds:
        y_pred = (y_prob >= t).astype(int)
        cm = sk_metrics.confusion_matrix(y_true, y_pred)
        tn, fp, fn, tp = cm.ravel()
        acc_agn = tn / (tn + fp) if (tn + fp) > 0 else 0
        acc_psr = tp / (tp + fn) if (tp + fn) > 0 else 0
        diff = abs(acc_agn - acc_psr)
        if diff < min_diff:
            min_diff = diff
            best_agn = acc_agn
            best_psr = acc_psr
            best_threshold = t
    return best_agn, best_psr, best_threshold

In [None]:
def f1_score_t(y_true, y_prob, threshold):
    y_pred = (y_prob >= threshold).astype(int)
    cm = sk_metrics.confusion_matrix(y_true, y_pred, labels=[0, 1])
    tn, fp, fn, tp = cm.ravel()
    return tp / (tp + 0.5 * (fp + fn))

In [None]:
def f1_score(y_true, y_prob):
    thresholds = np.linspace(0, 1, 101)
    best_threshold = 0.5
    best_f1 = 0.0
    for t in thresholds:
        y_pred = (y_prob >= t).astype(int)
        cm = sk_metrics.confusion_matrix(y_true, y_pred, labels=[0, 1])
        tn, fp, fn, tp = cm.ravel()
        f1 = tp / (tp + 0.5 * (fp + fn))
        if f1 > best_f1:
            best_f1 = f1
            best_threshold = t
    return best_f1, best_threshold

In [None]:
y_pred = (predicts >= 0.003).astype(int)
cm = sk_metrics.confusion_matrix(y_true, y_pred)
tn, fp, fn, tp = cm.ravel()
acc_agn = tn / (tn + fp) if (tn + fp) > 0 else 0
acc_psr = tp / (tp + fn) if (tp + fn) > 0 else 0
print(acc_agn,acc_psr)

In [None]:
print(best_accuracy(labels,predicts))
print(equal_accuracy(labels,predicts))
print(f1_score(labels,predicts))


In [None]:
y_pred = (predicts >= 0.8).astype(int)
y_true = labels
acc = sk_metrics.accuracy_score(y_true, y_pred)
print(acc - best_accuracy(y_true, predicts)[0])