In [None]:
import sys
if "google.colab" in sys.modules:
    print("Colab Detected")
    import tensorflow as tf
    gpus = tf.config.list_physical_devices("GPU")
    if not gpus:
        raise RuntimeError("Nessuna GPU trovata.Controlla di aver selezionato il runtime giusto.")
    else:
        print(f"Trovate {len(gpus)} GPU:\n{gpus}")
    
    !git clone https://github.com/AtomicDonuts/Progetto_Computings.git
    %cd Progetto_Computings/
    %pip install -q -r requirements.txt
    !python3 fits_import/fits2csv.py
    
    sys.path.append("imports/")
    import custom_variables as custom_paths
else:
    print("Local Machine Detected")
    sys.path.append("../imports/")
    import custom_variables as custom_paths

In [None]:
from keras.layers import Dense, Input, Concatenate,Flatten, Dropout
from keras.models import Model
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import StratifiedKFold
from tensorflow.keras.utils import plot_model
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

In [None]:
df = pd.read_csv(custom_paths.csv_path)
df = df[(df["CLASS_GENERIC"] == "AGN") | (df["CLASS_GENERIC"] == "Pulsar")]
print(len(df))

In [None]:
df["PowerLaw"] = np.where(
    df["SpectrumType"] == "PowerLaw",
    1,
    0,
)
df["LogParabola"] = np.where(
    df["SpectrumType"] == "LogParabola",
    1,
    0,
)
df["PLSuperExpCutoff"] = np.where(
    df["SpectrumType"] == "PLSuperExpCutoff",
    1,
    0,
)

In [None]:
norm_cols = ["GLAT", "PowerLaw","LogParabola","PLSuperExpCutoff" ,"Variability_Index"]

In [None]:
drop = df[norm_cols][np.array(np.isinf(df[norm_cols]).any(axis=1))].index
if len(drop):
    print(f"Dropping: {drop}")
    df = df.drop(drop)

In [None]:
# Non credo che normalizzare le colonne sia necessario per questo tipo di dati

# scaler = StandardScaler()
# scaler.fit(df[norm_cols])
# scaled_data = scaler.transform(df[norm_cols])
# df[norm_cols] = scaled_data

In [None]:
input_datas = df[norm_cols]

In [None]:
# Ho preferito lasciarlo scritto in questo modo nel caso volessi cambiare qualcosa dopo
is_agn = df["CLASS_GENERIC"].to_numpy() == "AGN"
is_psr = df["CLASS_GENERIC"].to_numpy() == "Pulsar"
labels = np.zeros((len(df)), dtype=int)
labels[is_agn] = 1
labels[is_psr] = 0

In [None]:
skf = StratifiedKFold(n_splits=10,shuffle=True)
skf_cose = skf.split(input_datas,labels)

In [None]:
for i, (train_index, test_index) in enumerate(skf_cose):
    print(f"Fold {i}:")
    print(f"  Train: index={train_index}")
    print(f"  Test:  index={test_index}")

In [None]:
model_name = "Modello_Professore"
input = Input(shape=input_datas.shape[1:], name="Inputs")
hidden = Dense(16, activation="relu", name="Dense_1")(input)
hidden = Dropout(0.2, name="Dropout_1")(hidden)
hidden = Dense(32, activation="relu", name="Dense_2")(hidden)
hidden = Dropout(0.2, name="Dropout_2")(hidden)
hidden = Dense(16, activation="relu", name="Dense_3")(hidden)
hidden = Dropout(0.2, name="Dropout_3")(hidden)
hidden = Dense(4, activation="relu", name="Dense_4")(hidden)
output = Dense(1, activation="sigmoid", name="Output")(hidden)

model_prof = Model(inputs=input, outputs=output, name=model_name)
model_prof.compile(loss="binary_crossentropy", optimizer="adam")
model_prof.optimizer.learning_rate = 0.01


model_prof.summary()
plot_model(
    model_prof, to_file=f"{model_name}.png", show_shapes=True, show_layer_names=True
)

In [None]:
early_stopping = EarlyStopping(
    monitor="val_loss", patience=10, restore_best_weights=True
)
reduce_lr = ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=5)

history = model_prof.fit(
    input_datas,
    labels,
    #batch_size=,
    validation_split=0.5,
    epochs=300,
    callbacks=[early_stopping,reduce_lr]
)

In [None]:
plt.plot(history.history["loss"],label = "loss")
plt.plot(history.history["val_loss"], label="val_loss")
plt.title("Loss history")
plt.ylabel("Loss value")
plt.xlabel("No. epoch")
plt.legend()
plt.show()

In [None]:
if False:
    model_prof.save(f"{model_name}.keras")

In [None]:
predictions = model_prof.predict(input_datas)

In [None]:
mask_predicted_psr = predictions[:, 0] < 0.2
mask_predicted_agn = predictions[:, 0] > 0.8
mat_h = np.vstack([is_agn, is_psr])
mat_v = np.array([mask_predicted_agn, mask_predicted_psr])
# 2 ore per creare sta linea di codice di merda
mat_vectorized = mat_h[:, None, :] & mat_v[None, :, :]
confusion_matrix = mat_vectorized.sum(axis=2)

In [None]:
TOT = len(predictions)
TN = confusion_matrix[0,0]
FP = confusion_matrix[0,1]
FN = confusion_matrix[1,0]
TP = confusion_matrix[1,1]
print(
    f"Data under the cutoff: {TOT - (TN + FP + FN + TP)} ie {np.round(((TOT - (TN + FP + FN + TP))/TOT) * 100,2)}%"
)
print(f"AGN Accuracy: {TN/(TN+FP)}")
print(f"PSR Accuracy: {TP/(TP+FN)}")
print(f"Total Accurcy: {(TP + TN) / (TN + FP + FN + TP)}")
print(f"F1 Score: {TP/(TP + 0.5*(FP+FN))}")

In [None]:
confusion_matrix