In [15]:
import imblearn
from sklearn.neural_network import MLPClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, FunctionTransformer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import BaggingClassifier
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score, log_loss, roc_auc_score, RocCurveDisplay, roc_curve
import wandb
from wandb.sklearn import plot_precision_recall, plot_feature_importances
from wandb.sklearn import plot_class_proportions, plot_learning_curve, plot_roc
import numpy as np
import pandas as pd
from IMPJ import DataProcessor
import random
import matplotlib.pyplot as plt
from os.path import basename, exists
import matplotlib as mpl
import random
from collections import Counter
from imblearn.over_sampling import SMOTE
from sklearn.multiclass import OneVsRestClassifier

dp = DataProcessor()

def download(url):
    filename = basename(url)
    if not exists(filename):
        from urllib.request import urlretrieve
        local, _ = urlretrieve(url, filename)
        print('Downloaded ' + local)
        
download('https://github.com/AllenDowney/AstronomicalData/raw/main/' + 'az-paper-twocol.mplstyle')
plt.style.use('./az-paper-twocol.mplstyle')

In [16]:
test_size = 0.3
    
filepath = '..\\Fermi-LAT Data Subsets\\chiaro_12.csv'
data = pd.read_csv(filepath, index_col=0)
data = data.dropna()
Y = data['CLASS1']
X = data.drop(labels='CLASS1',axis=1)
#X,Y = dp.choose_2_vars('CLASS1',data,1,2)

In [33]:
sm = SMOTE(random_state=42)

x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=test_size,random_state=2) 
scaler = StandardScaler()  
scaler.fit(x_train)  
x_train = scaler.transform(x_train)  
x_test = scaler.transform(x_test)  

print(f'Original Dataset Shape: {Counter(y_train)}')
x_train_res, y_train_res = sm.fit_resample(x_train, y_train)
print(f'Resampled dataset shape {Counter(y_train_res)}')

for i in range(0,len(y_test)):
    if np.array(y_test)[i] == 3:
        print(np.array(y_test)[i])

Original Dataset Shape: Counter({1: 1043, 2: 575, 3: 34})
Resampled dataset shape Counter({1: 1043, 2: 1043, 3: 1043})
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3


In [34]:
NN = MLPClassifier(activation='tanh',hidden_layer_sizes=(50,50),learning_rate='constant',solver='sgd',
                   max_iter=5000,random_state=5)
BNN = BaggingClassifier(estimator=NN,n_estimators=20,bootstrap=True)
ovr = OneVsRestClassifier(estimator=BNN, n_jobs=-1)
ovr.fit(x_train_res,y_train_res)
y_pred = ovr.predict(x_test)
y_proba = ovr.predict_proba(x_test)


In [42]:
print(y_pred)

[1 1 2 2 1 1 2 1 2 1 2 1 3 2 1 2 1 1 1 1 2 1 1 1 1 3 1 1 1 1 1 1 1 2 1 1 2
 2 1 1 1 3 1 1 1 1 1 1 2 1 2 2 2 2 1 1 1 2 1 1 1 2 1 1 2 2 1 2 1 1 1 1 2 1
 1 1 1 1 2 2 1 2 1 2 2 2 1 3 1 1 1 1 1 1 1 2 2 1 1 2 2 2 2 1 2 1 2 1 1 1 1
 1 1 2 2 2 1 2 1 1 1 2 1 1 2 1 2 1 2 2 1 1 1 2 3 1 1 2 1 2 1 2 1 1 1 1 2 1
 1 2 1 1 1 1 2 1 2 1 1 2 2 1 1 2 2 2 1 1 3 1 1 3 2 1 1 1 1 1 1 1 3 3 2 2 2
 1 1 3 3 2 2 3 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 2 1 1 1 2 3 1 1 1 1 1
 1 1 2 2 1 1 2 1 2 2 2 1 3 1 1 2 2 3 1 1 1 2 2 1 2 1 3 2 1 1 1 1 1 1 2 1 1
 2 2 1 1 2 1 1 2 2 1 1 1 1 2 1 1 1 1 2 1 1 2 1 1 1 2 1 1 1 1 1 2 1 1 3 1 2
 1 1 2 1 3 1 1 1 2 1 1 2 1 1 1 2 2 3 1 3 1 3 1 1 2 2 1 1 1 1 1 2 1 2 1 1 1
 1 2 3 1 1 2 1 1 2 1 1 1 1 1 2 1 2 1 3 1 1 2 1 2 1 1 1 1 3 1 1 2 2 2 1 3 1
 1 1 2 1 1 2 2 1 1 1 1 2 3 2 1 2 1 1 1 1 1 1 3 1 2 1 1 1 2 1 1 1 1 2 1 2 3
 2 1 1 1 2 1 2 2 1 1 1 2 2 2 1 1 2 2 1 1 2 2 1 1 2 1 3 1 2 3 1 2 1 2 2 2 1
 2 2 1 1 1 2 1 2 1 2 2 2 1 2 2 1 1 2 2 1 1 1 3 1 2 1 3 2 3 1 1 2 1 2 2 2 1
 2 1 1 1 2 1 2 1 2 2 1 1 

In [40]:
print(np.unique(y_train))

''' SCORING METRICS '''

accuracy = accuracy_score(y_test, y_pred)*100
confusion = confusion_matrix(y_test, y_pred)
#roc_auc = roc_auc_score(y_test,y_pred) 
#roc_auc_weightedavg = roc_auc_score(y_test,y_pred,average='weighted')
#logloss = log_loss(y_test,y_pred)
#f1 = f1_score(y_test,y_pred)
#fpr, tpr, thresholds = roc_curve(y_test,y_proba.T[1])

print('The Neural Network accuracy is ' + str(accuracy))
#print('The Neural Network ROC AUC Scores are: '+str(roc_auc))
#print("The Neural Network's Weighted Average ROC AUC Score is: " + str(roc_auc_weightedavg))
#print("The Neural Network's Logarithmic Loss Score is: " + str(logloss))
#print('The Neural Network F1 Score is: '+str(f1))
#print('The Neural Network Confusion Matrix is:')
print(confusion)

[1 2 3]
The Neural Network accuracy is 82.06214689265536
[[372  31  44]
 [ 29 207   6]
 [ 13   4   2]]
