# Neural Network Classifier Testing

In [9]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from imblearn.over_sampling import SMOTE 
from imblearn.under_sampling import NearMiss 
from sklearn.preprocessing import StandardScaler 
from sklearn.metrics import confusion_matrix, classification_report

In [10]:
xTestData = np.genfromtxt("data/X_test.txt", delimiter = None, skip_header=1)
xTrainData = np.genfromtxt("data/X_train.txt", delimiter = None, skip_header=1)
yTrainData = np.genfromtxt("data/Y_train.txt", delimiter = None, skip_header=1)

In [11]:
xTrain, xVal, yTrain, yVal = train_test_split(xTrainData, yTrainData, test_size=0.25, shuffle = True)

In [12]:
yTrainDist = yTrain[:, 1]
yTrainZ = yTrain[:, 0]
yValDist = yVal[:,1]
yValZ = yVal[:,0]

xTestIds = xTestData[:,0]

#OverSampler
sm = SMOTE(random_state = 2)

#UnderSampler
nr = NearMiss()

**Helper method for printing Score for KNN(1-50 neighbors)**

In [13]:
def printScores(xTrain, yTrain, xVal):
    scale = StandardScaler().fit(xTrain)
    xTrainScaled = scale.transform(xTrain)
    xValScaled = scale.transform(xVal)

    activationTypes = ["identity", "logistic", "tanh", "relu"] # Logistic is sigmoid
    
    for activate in activationTypes:
        clf = MLPClassifier(activation=activate,hidden_layer_sizes=(2), random_state=1, max_iter = 2000, learning_rate_init=10)
        clf.fit(xTrainScaled, yTrain.ravel())
        perc = "{:.2%}".format(clf.score(xValScaled, yValDist))
        yPred = clf.predict(xValScaled)
        print(activate + ": " + str(perc) + "\n" + str(classification_report(yValDist, yPred)) + "\n" + str(confusion_matrix(yValDist, yPred)) + "\n")


In [23]:
def printScoresMore(xTrain, yTrain, xVal):
    scale = StandardScaler().fit(xTrain)
    xTrainScaled = scale.transform(xTrain)
    xValScaled = scale.transform(xVal)

    activationTypes = ["identity", "logistic", "tanh", "relu"] # Logistic is sigmoid
    
    for activate in activationTypes:
        for i in range(2,25, 1):
            clf = MLPClassifier(activation=activate,hidden_layer_sizes=(i), random_state=1, max_iter = 2000, learning_rate_init=11)
            clf.fit(xTrainScaled, yTrain.ravel())
            perc = "{:.2%}".format(clf.score(xValScaled, yValDist))
            yPred = clf.predict(xValScaled)
            tn, fp, fn, tp = confusion_matrix(yValDist, yPred).ravel()
            if (fn > 20 and tp > 20 and tn > 20 and fp <= 100):
                print(activate + " and " + str(i) + "neurons: " + str(perc) + "\n" + str(classification_report(yValDist, yPred)) + "\n" + str(confusion_matrix(yValDist, yPred)) + "\n")
                print("tn: " + str(tn))
                print("fp: " + str(fp))
                print("fn: " + str(fn))
                print("tp: " + str(tp))
                print("\n")
                perc = "{:.2%}".format(clf.score(xTrainScaled, yTrain))
                print(perc)

# Important Features

In [17]:
# xTrainImp = xTrain[:,[1,4,5,11,13,8,14]]
# xValImp = xVal[:,[1,4,5,11,13,8,14]]
xTrainImp = xTrain[:,[11]]
xValImp = xVal[:,[11]]
#xTestImp = xTestData[:,[1,4,5,11,13,8,14]]
xTestImp = xTestData[:,[11]]

In [24]:
xTrainBal, yTrainBal = sm.fit_sample(xTrainImp, yTrainDist.ravel())

printScoresMore(xTrainBal, yTrainBal, xValImp)

identity and 2neurons: 93.13%
              precision    recall  f1-score   support

         0.0       0.98      0.95      0.96      1136
         1.0       0.36      0.55      0.44        58

    accuracy                           0.93      1194
   macro avg       0.67      0.75      0.70      1194
weighted avg       0.95      0.93      0.94      1194

[[1080   56]
 [  26   32]]

tn: 1080
fp: 56
fn: 26
tp: 32


72.53%
identity and 9neurons: 90.79%
              precision    recall  f1-score   support

         0.0       0.98      0.92      0.95      1136
         1.0       0.29      0.64      0.40        58

    accuracy                           0.91      1194
   macro avg       0.64      0.78      0.68      1194
weighted avg       0.95      0.91      0.92      1194

[[1047   89]
 [  21   37]]

tn: 1047
fp: 89
fn: 21
tp: 37


72.18%
identity and 15neurons: 94.64%
              precision    recall  f1-score   support

         0.0       0.97      0.97      0.97      1136
         1.0

# Testing Features for Overfit

In [8]:
xTrainFeat = xTrain[:,[1,4,5,11,8,14,22]]
xValFeat = xVal[:,[1,4,5,11,8,14,22]]
xTestFeat = xTestData[:,[1,4,5,11,8,14,22]]

In [9]:
xTrainBal, yTrainBal = sm.fit_sample(xTrainFeat, yTrainDist.ravel())

printScores(xTrainBal, yTrainBal, xValFeat)

identity: 91.37%
              precision    recall  f1-score   support

         0.0       0.99      0.92      0.95      1138
         1.0       0.33      0.80      0.47        56

    accuracy                           0.91      1194
   macro avg       0.66      0.86      0.71      1194
weighted avg       0.96      0.91      0.93      1194

[[1046   92]
 [  11   45]]



  'precision', 'predicted', average, warn_for)


logistic: 95.31%
              precision    recall  f1-score   support

         0.0       0.95      1.00      0.98      1138
         1.0       0.00      0.00      0.00        56

    accuracy                           0.95      1194
   macro avg       0.48      0.50      0.49      1194
weighted avg       0.91      0.95      0.93      1194

[[1138    0]
 [  56    0]]

tanh: 98.49%
              precision    recall  f1-score   support

         0.0       0.99      0.99      0.99      1138
         1.0       0.83      0.86      0.84        56

    accuracy                           0.98      1194
   macro avg       0.91      0.92      0.92      1194
weighted avg       0.99      0.98      0.99      1194

[[1128   10]
 [   8   48]]

relu: 98.83%
              precision    recall  f1-score   support

         0.0       1.00      0.99      0.99      1138
         1.0       0.84      0.93      0.88        56

    accuracy                           0.99      1194
   macro avg       0.92      

# All Features

**Features**

In [10]:
xTrainAll = xTrain[:, 1:35]
xValAll = xVal[:, 1:35]
xTestAll = xTestData[:, 1:35]

**Unbalanced**

In [11]:
printScores(xTrainAll, yTrainDist, xValAll)

identity: 97.15%
              precision    recall  f1-score   support

         0.0       0.98      0.99      0.99      1138
         1.0       0.79      0.54      0.64        56

    accuracy                           0.97      1194
   macro avg       0.88      0.76      0.81      1194
weighted avg       0.97      0.97      0.97      1194

[[1130    8]
 [  26   30]]



  'precision', 'predicted', average, warn_for)


logistic: 95.31%
              precision    recall  f1-score   support

         0.0       0.95      1.00      0.98      1138
         1.0       0.00      0.00      0.00        56

    accuracy                           0.95      1194
   macro avg       0.48      0.50      0.49      1194
weighted avg       0.91      0.95      0.93      1194

[[1138    0]
 [  56    0]]

tanh: 96.48%
              precision    recall  f1-score   support

         0.0       0.97      0.99      0.98      1138
         1.0       0.68      0.48      0.56        56

    accuracy                           0.96      1194
   macro avg       0.82      0.74      0.77      1194
weighted avg       0.96      0.96      0.96      1194

[[1125   13]
 [  29   27]]

relu: 95.31%
              precision    recall  f1-score   support

         0.0       0.95      1.00      0.98      1138
         1.0       0.00      0.00      0.00        56

    accuracy                           0.95      1194
   macro avg       0.48      

  'precision', 'predicted', average, warn_for)


**OverSampling**

In [12]:
xTrainBal, yTrainBal = sm.fit_sample(xTrainAll, yTrainDist.ravel())

printScores(xTrainBal, yTrainBal, xValAll)

identity: 97.82%
              precision    recall  f1-score   support

         0.0       0.99      0.99      0.99      1138
         1.0       0.74      0.82      0.78        56

    accuracy                           0.98      1194
   macro avg       0.87      0.90      0.88      1194
weighted avg       0.98      0.98      0.98      1194

[[1122   16]
 [  10   46]]



  'precision', 'predicted', average, warn_for)


logistic: 95.31%
              precision    recall  f1-score   support

         0.0       0.95      1.00      0.98      1138
         1.0       0.00      0.00      0.00        56

    accuracy                           0.95      1194
   macro avg       0.48      0.50      0.49      1194
weighted avg       0.91      0.95      0.93      1194

[[1138    0]
 [  56    0]]

tanh: 96.73%
              precision    recall  f1-score   support

         0.0       0.99      0.98      0.98      1138
         1.0       0.63      0.71      0.67        56

    accuracy                           0.97      1194
   macro avg       0.81      0.85      0.83      1194
weighted avg       0.97      0.97      0.97      1194

[[1115   23]
 [  16   40]]

relu: 98.16%
              precision    recall  f1-score   support

         0.0       0.99      0.99      0.99      1138
         1.0       0.81      0.79      0.80        56

    accuracy                           0.98      1194
   macro avg       0.90      

**UnderSampling**

In [13]:
xTrainBal, yTrainBal = nr.fit_sample(xTrainAll, yTrainDist.ravel())

printScores(xTrainBal, yTrainBal, xValAll)

identity: 91.12%
              precision    recall  f1-score   support

         0.0       0.99      0.91      0.95      1138
         1.0       0.33      0.89      0.49        56

    accuracy                           0.91      1194
   macro avg       0.66      0.90      0.72      1194
weighted avg       0.96      0.91      0.93      1194

[[1038  100]
 [   6   50]]

logistic: 4.69%
              precision    recall  f1-score   support

         0.0       0.00      0.00      0.00      1138
         1.0       0.05      1.00      0.09        56

    accuracy                           0.05      1194
   macro avg       0.02      0.50      0.04      1194
weighted avg       0.00      0.05      0.00      1194

[[   0 1138]
 [   0   56]]



  'precision', 'predicted', average, warn_for)


tanh: 94.89%
              precision    recall  f1-score   support

         0.0       1.00      0.95      0.97      1138
         1.0       0.48      0.91      0.63        56

    accuracy                           0.95      1194
   macro avg       0.74      0.93      0.80      1194
weighted avg       0.97      0.95      0.96      1194

[[1082   56]
 [   5   51]]

relu: 4.69%
              precision    recall  f1-score   support

         0.0       0.00      0.00      0.00      1138
         1.0       0.05      1.00      0.09        56

    accuracy                           0.05      1194
   macro avg       0.02      0.50      0.04      1194
weighted avg       0.00      0.05      0.00      1194

[[   0 1138]
 [   0   56]]



  'precision', 'predicted', average, warn_for)


# All first Features of each Category

**Features**

In [14]:
xTrainFirsts = xTrain[:,[1,2,3, 4, 10, 20, 23, 27,30]]
xValFirsts = xVal[:,[1,2,3, 4, 10, 20, 23, 27,30]]
xTestFirsts = xTestData[:,[1,2,3, 4, 10, 20, 23, 27,30]]

**Unbalanced Data**

In [15]:
printScores(xTrainFirsts, yTrainDist, xValFirsts)

identity: 97.32%
              precision    recall  f1-score   support

         0.0       0.98      0.99      0.99      1138
         1.0       0.80      0.57      0.67        56

    accuracy                           0.97      1194
   macro avg       0.89      0.78      0.83      1194
weighted avg       0.97      0.97      0.97      1194

[[1130    8]
 [  24   32]]



  'precision', 'predicted', average, warn_for)


logistic: 95.31%
              precision    recall  f1-score   support

         0.0       0.95      1.00      0.98      1138
         1.0       0.00      0.00      0.00        56

    accuracy                           0.95      1194
   macro avg       0.48      0.50      0.49      1194
weighted avg       0.91      0.95      0.93      1194

[[1138    0]
 [  56    0]]

tanh: 97.91%
              precision    recall  f1-score   support

         0.0       0.99      0.98      0.99      1138
         1.0       0.73      0.88      0.80        56

    accuracy                           0.98      1194
   macro avg       0.86      0.93      0.89      1194
weighted avg       0.98      0.98      0.98      1194

[[1120   18]
 [   7   49]]

relu: 97.91%
              precision    recall  f1-score   support

         0.0       0.99      0.99      0.99      1138
         1.0       0.79      0.75      0.77        56

    accuracy                           0.98      1194
   macro avg       0.89      

**OverSampling**

In [16]:
xTrainBal, yTrainBal = sm.fit_sample(xTrainFirsts, yTrainDist.ravel())

printScores(xTrainBal, yTrainBal, xValFirsts)

identity: 95.14%
              precision    recall  f1-score   support

         0.0       1.00      0.95      0.97      1138
         1.0       0.49      0.96      0.65        56

    accuracy                           0.95      1194
   macro avg       0.74      0.96      0.81      1194
weighted avg       0.97      0.95      0.96      1194

[[1082   56]
 [   2   54]]



  'precision', 'predicted', average, warn_for)


logistic: 95.31%
              precision    recall  f1-score   support

         0.0       0.95      1.00      0.98      1138
         1.0       0.00      0.00      0.00        56

    accuracy                           0.95      1194
   macro avg       0.48      0.50      0.49      1194
weighted avg       0.91      0.95      0.93      1194

[[1138    0]
 [  56    0]]

tanh: 96.57%
              precision    recall  f1-score   support

         0.0       1.00      0.97      0.98      1138
         1.0       0.59      0.91      0.71        56

    accuracy                           0.97      1194
   macro avg       0.79      0.94      0.85      1194
weighted avg       0.98      0.97      0.97      1194

[[1102   36]
 [   5   51]]

relu: 97.32%
              precision    recall  f1-score   support

         0.0       0.99      0.98      0.99      1138
         1.0       0.67      0.84      0.75        56

    accuracy                           0.97      1194
   macro avg       0.83      

**UnderSampling**

In [17]:
xTrainBal, yTrainBal = nr.fit_sample(xTrainFirsts, yTrainDist.ravel())

printScores(xTrainBal, yTrainBal, xValFirsts)

identity: 94.47%
              precision    recall  f1-score   support

         0.0       0.99      0.95      0.97      1138
         1.0       0.45      0.80      0.58        56

    accuracy                           0.94      1194
   macro avg       0.72      0.88      0.77      1194
weighted avg       0.96      0.94      0.95      1194

[[1083   55]
 [  11   45]]



  'precision', 'predicted', average, warn_for)


logistic: 95.31%
              precision    recall  f1-score   support

         0.0       0.95      1.00      0.98      1138
         1.0       0.00      0.00      0.00        56

    accuracy                           0.95      1194
   macro avg       0.48      0.50      0.49      1194
weighted avg       0.91      0.95      0.93      1194

[[1138    0]
 [  56    0]]

tanh: 94.05%
              precision    recall  f1-score   support

         0.0       0.99      0.94      0.97      1138
         1.0       0.43      0.89      0.58        56

    accuracy                           0.94      1194
   macro avg       0.71      0.92      0.78      1194
weighted avg       0.97      0.94      0.95      1194

[[1073   65]
 [   6   50]]

relu: 94.14%
              precision    recall  f1-score   support

         0.0       0.99      0.94      0.97      1138
         1.0       0.44      0.89      0.59        56

    accuracy                           0.94      1194
   macro avg       0.72      

# All second Features of each Category

**Features**

In [18]:
xTrainSeconds = xTrain[:,[1,2,3, 5, 11, 21, 24, 28,31]]
xValSeconds = xVal[:,[1,2,3, 5, 11, 21, 24, 28,31]]

**Unbalanced**

In [19]:
printScores(xTrainSeconds, yTrainDist, xValSeconds)

identity: 96.40%
              precision    recall  f1-score   support

         0.0       0.97      0.99      0.98      1138
         1.0       0.74      0.36      0.48        56

    accuracy                           0.96      1194
   macro avg       0.85      0.68      0.73      1194
weighted avg       0.96      0.96      0.96      1194

[[1131    7]
 [  36   20]]



  'precision', 'predicted', average, warn_for)


logistic: 95.31%
              precision    recall  f1-score   support

         0.0       0.95      1.00      0.98      1138
         1.0       0.00      0.00      0.00        56

    accuracy                           0.95      1194
   macro avg       0.48      0.50      0.49      1194
weighted avg       0.91      0.95      0.93      1194

[[1138    0]
 [  56    0]]

tanh: 96.98%
              precision    recall  f1-score   support

         0.0       0.99      0.98      0.98      1138
         1.0       0.67      0.70      0.68        56

    accuracy                           0.97      1194
   macro avg       0.83      0.84      0.83      1194
weighted avg       0.97      0.97      0.97      1194

[[1119   19]
 [  17   39]]

relu: 95.31%
              precision    recall  f1-score   support

         0.0       0.95      1.00      0.98      1138
         1.0       0.00      0.00      0.00        56

    accuracy                           0.95      1194
   macro avg       0.48      

  'precision', 'predicted', average, warn_for)


**OverSampling**

In [20]:
xTrainBal, yTrainBal = sm.fit_sample(xTrainSeconds, yTrainDist.ravel())

printScores(xTrainBal, yTrainBal, xValSeconds)

identity: 90.45%
              precision    recall  f1-score   support

         0.0       0.99      0.91      0.95      1138
         1.0       0.30      0.80      0.44        56

    accuracy                           0.90      1194
   macro avg       0.65      0.86      0.69      1194
weighted avg       0.96      0.90      0.92      1194

[[1035  103]
 [  11   45]]



  'precision', 'predicted', average, warn_for)


logistic: 95.31%
              precision    recall  f1-score   support

         0.0       0.95      1.00      0.98      1138
         1.0       0.00      0.00      0.00        56

    accuracy                           0.95      1194
   macro avg       0.48      0.50      0.49      1194
weighted avg       0.91      0.95      0.93      1194

[[1138    0]
 [  56    0]]

tanh: 94.89%
              precision    recall  f1-score   support

         0.0       0.99      0.95      0.97      1138
         1.0       0.48      0.88      0.62        56

    accuracy                           0.95      1194
   macro avg       0.73      0.91      0.79      1194
weighted avg       0.97      0.95      0.96      1194

[[1084   54]
 [   7   49]]

relu: 96.48%
              precision    recall  f1-score   support

         0.0       0.99      0.97      0.98      1138
         1.0       0.59      0.82      0.69        56

    accuracy                           0.96      1194
   macro avg       0.79      

**UnderSampling**

In [21]:
xTrainBal, yTrainBal = nr.fit_sample(xTrainSeconds, yTrainDist.ravel())

printScores(xTrainBal, yTrainBal, xValSeconds)

identity: 86.35%
              precision    recall  f1-score   support

         0.0       0.99      0.86      0.92      1138
         1.0       0.23      0.84      0.37        56

    accuracy                           0.86      1194
   macro avg       0.61      0.85      0.64      1194
weighted avg       0.96      0.86      0.90      1194

[[984 154]
 [  9  47]]



  'precision', 'predicted', average, warn_for)


logistic: 95.31%
              precision    recall  f1-score   support

         0.0       0.95      1.00      0.98      1138
         1.0       0.00      0.00      0.00        56

    accuracy                           0.95      1194
   macro avg       0.48      0.50      0.49      1194
weighted avg       0.91      0.95      0.93      1194

[[1138    0]
 [  56    0]]

tanh: 74.20%
              precision    recall  f1-score   support

         0.0       0.99      0.74      0.85      1138
         1.0       0.13      0.82      0.23        56

    accuracy                           0.74      1194
   macro avg       0.56      0.78      0.54      1194
weighted avg       0.95      0.74      0.82      1194

[[840 298]
 [ 10  46]]

relu: 61.06%
              precision    recall  f1-score   support

         0.0       0.99      0.60      0.75      1138
         1.0       0.09      0.84      0.17        56

    accuracy                           0.61      1194
   macro avg       0.54      0.72

# All third features of each Category

**Features**

In [22]:
xTrainThirds = xTrain[:,[1,2,3, 6, 12, 22, 25, 29,32]]
xValThirds = xVal[:,[1,2,3, 6, 12, 22, 25, 29,32]]

**Unbalanced**

In [23]:
printScores(xTrainThirds, yTrainDist, xValThirds)

identity: 95.23%
              precision    recall  f1-score   support

         0.0       0.96      0.99      0.98      1138
         1.0       0.46      0.11      0.17        56

    accuracy                           0.95      1194
   macro avg       0.71      0.55      0.57      1194
weighted avg       0.93      0.95      0.94      1194

[[1131    7]
 [  50    6]]



  'precision', 'predicted', average, warn_for)


logistic: 95.31%
              precision    recall  f1-score   support

         0.0       0.95      1.00      0.98      1138
         1.0       0.00      0.00      0.00        56

    accuracy                           0.95      1194
   macro avg       0.48      0.50      0.49      1194
weighted avg       0.91      0.95      0.93      1194

[[1138    0]
 [  56    0]]

tanh: 96.06%
              precision    recall  f1-score   support

         0.0       0.98      0.98      0.98      1138
         1.0       0.59      0.54      0.56        56

    accuracy                           0.96      1194
   macro avg       0.78      0.76      0.77      1194
weighted avg       0.96      0.96      0.96      1194

[[1117   21]
 [  26   30]]

relu: 95.31%
              precision    recall  f1-score   support

         0.0       0.95      1.00      0.98      1138
         1.0       0.00      0.00      0.00        56

    accuracy                           0.95      1194
   macro avg       0.48      

  'precision', 'predicted', average, warn_for)


**OverSampling**

In [24]:
xTrainBal, yTrainBal = sm.fit_sample(xTrainThirds, yTrainDist.ravel())

printScores(xTrainBal, yTrainBal, xValThirds)

identity: 82.83%
              precision    recall  f1-score   support

         0.0       0.98      0.84      0.90      1138
         1.0       0.17      0.68      0.27        56

    accuracy                           0.83      1194
   macro avg       0.58      0.76      0.59      1194
weighted avg       0.94      0.83      0.87      1194

[[951 187]
 [ 18  38]]



  'precision', 'predicted', average, warn_for)


logistic: 95.31%
              precision    recall  f1-score   support

         0.0       0.95      1.00      0.98      1138
         1.0       0.00      0.00      0.00        56

    accuracy                           0.95      1194
   macro avg       0.48      0.50      0.49      1194
weighted avg       0.91      0.95      0.93      1194

[[1138    0]
 [  56    0]]

tanh: 95.14%
              precision    recall  f1-score   support

         0.0       0.99      0.96      0.97      1138
         1.0       0.49      0.79      0.60        56

    accuracy                           0.95      1194
   macro avg       0.74      0.87      0.79      1194
weighted avg       0.97      0.95      0.96      1194

[[1092   46]
 [  12   44]]

relu: 92.46%
              precision    recall  f1-score   support

         0.0       0.98      0.94      0.96      1138
         1.0       0.35      0.70      0.46        56

    accuracy                           0.92      1194
   macro avg       0.67      

**UnderSampling**

In [25]:
xTrainBal, yTrainBal = nr.fit_sample(xTrainThirds, yTrainDist.ravel())

printScores(xTrainBal, yTrainBal, xValThirds)

identity: 83.42%
              precision    recall  f1-score   support

         0.0       0.98      0.84      0.91      1138
         1.0       0.18      0.71      0.29        56

    accuracy                           0.83      1194
   macro avg       0.58      0.78      0.60      1194
weighted avg       0.95      0.83      0.88      1194

[[956 182]
 [ 16  40]]



  'precision', 'predicted', average, warn_for)


logistic: 95.31%
              precision    recall  f1-score   support

         0.0       0.95      1.00      0.98      1138
         1.0       0.00      0.00      0.00        56

    accuracy                           0.95      1194
   macro avg       0.48      0.50      0.49      1194
weighted avg       0.91      0.95      0.93      1194

[[1138    0]
 [  56    0]]

tanh: 81.16%
              precision    recall  f1-score   support

         0.0       0.99      0.81      0.89      1138
         1.0       0.18      0.82      0.29        56

    accuracy                           0.81      1194
   macro avg       0.58      0.82      0.59      1194
weighted avg       0.95      0.81      0.86      1194

[[923 215]
 [ 10  46]]

relu: 76.05%
              precision    recall  f1-score   support

         0.0       0.98      0.76      0.86      1138
         1.0       0.13      0.75      0.23        56

    accuracy                           0.76      1194
   macro avg       0.56      0.76

# Controls and Liquidity Ratios

**Features**

In [26]:
xTrainLR = xTrain[:,[1,2,3, 4, 5, 6, 7, 8,9]]
xValLR = xVal[:,[1,2,3, 4, 5, 6, 7, 8,9]]
xTestLR = xTrainData[:,[1,2,3, 4, 5, 6, 7, 8,9]]

**Unbalanced**

In [27]:
printScores(xTrainLR, yTrainDist, xValLR)

identity: 95.98%
              precision    recall  f1-score   support

         0.0       0.97      0.99      0.98      1138
         1.0       0.67      0.29      0.40        56

    accuracy                           0.96      1194
   macro avg       0.82      0.64      0.69      1194
weighted avg       0.95      0.96      0.95      1194

[[1130    8]
 [  40   16]]



  'precision', 'predicted', average, warn_for)


logistic: 95.31%
              precision    recall  f1-score   support

         0.0       0.95      1.00      0.98      1138
         1.0       0.00      0.00      0.00        56

    accuracy                           0.95      1194
   macro avg       0.48      0.50      0.49      1194
weighted avg       0.91      0.95      0.93      1194

[[1138    0]
 [  56    0]]

tanh: 96.57%
              precision    recall  f1-score   support

         0.0       0.97      0.99      0.98      1138
         1.0       0.71      0.45      0.55        56

    accuracy                           0.97      1194
   macro avg       0.84      0.72      0.77      1194
weighted avg       0.96      0.97      0.96      1194

[[1128   10]
 [  31   25]]

relu: 96.31%
              precision    recall  f1-score   support

         0.0       0.98      0.98      0.98      1138
         1.0       0.61      0.61      0.61        56

    accuracy                           0.96      1194
   macro avg       0.79      

**OverSampling**

In [28]:
xTrainBal, yTrainBal = sm.fit_sample(xTrainLR, yTrainDist.ravel())

printScores(xTrainBal, yTrainBal, xValLR)

identity: 86.68%
              precision    recall  f1-score   support

         0.0       0.99      0.87      0.93      1138
         1.0       0.23      0.80      0.36        56

    accuracy                           0.87      1194
   macro avg       0.61      0.84      0.64      1194
weighted avg       0.95      0.87      0.90      1194

[[990 148]
 [ 11  45]]



  'precision', 'predicted', average, warn_for)


logistic: 95.31%
              precision    recall  f1-score   support

         0.0       0.95      1.00      0.98      1138
         1.0       0.00      0.00      0.00        56

    accuracy                           0.95      1194
   macro avg       0.48      0.50      0.49      1194
weighted avg       0.91      0.95      0.93      1194

[[1138    0]
 [  56    0]]

tanh: 90.70%
              precision    recall  f1-score   support

         0.0       0.99      0.91      0.95      1138
         1.0       0.30      0.77      0.44        56

    accuracy                           0.91      1194
   macro avg       0.65      0.84      0.69      1194
weighted avg       0.96      0.91      0.93      1194

[[1040   98]
 [  13   43]]

relu: 93.89%
              precision    recall  f1-score   support

         0.0       0.99      0.95      0.97      1138
         1.0       0.41      0.71      0.52        56

    accuracy                           0.94      1194
   macro avg       0.70      

**UnderSampling**

In [29]:
xTrainBal, yTrainBal = nr.fit_sample(xTrainLR, yTrainDist.ravel())

printScores(xTrainBal, yTrainBal, xValLR)

identity: 78.64%
              precision    recall  f1-score   support

         0.0       0.98      0.79      0.88      1138
         1.0       0.14      0.68      0.23        56

    accuracy                           0.79      1194
   macro avg       0.56      0.74      0.55      1194
weighted avg       0.94      0.79      0.85      1194

[[901 237]
 [ 18  38]]



  'precision', 'predicted', average, warn_for)


logistic: 95.31%
              precision    recall  f1-score   support

         0.0       0.95      1.00      0.98      1138
         1.0       0.00      0.00      0.00        56

    accuracy                           0.95      1194
   macro avg       0.48      0.50      0.49      1194
weighted avg       0.91      0.95      0.93      1194

[[1138    0]
 [  56    0]]

tanh: 43.72%
              precision    recall  f1-score   support

         0.0       0.97      0.42      0.59      1138
         1.0       0.06      0.77      0.11        56

    accuracy                           0.44      1194
   macro avg       0.52      0.59      0.35      1194
weighted avg       0.93      0.44      0.57      1194

[[479 659]
 [ 13  43]]

relu: 44.14%
              precision    recall  f1-score   support

         0.0       0.97      0.43      0.59      1138
         1.0       0.06      0.73      0.11        56

    accuracy                           0.44      1194
   macro avg       0.51      0.58

# Controls and Profitability Ratios

**Features**

In [30]:
xTrainPR = xTrain[:,[1, 2, 3, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]]
xValPR = xVal[:,[1, 2, 3, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]]

**Unbalanced**

In [31]:
printScores(xTrainPR, yTrainDist, xValPR)

identity: 95.48%
              precision    recall  f1-score   support

         0.0       0.96      1.00      0.98      1138
         1.0       0.58      0.12      0.21        56

    accuracy                           0.95      1194
   macro avg       0.77      0.56      0.59      1194
weighted avg       0.94      0.95      0.94      1194

[[1133    5]
 [  49    7]]



  'precision', 'predicted', average, warn_for)


logistic: 95.31%
              precision    recall  f1-score   support

         0.0       0.95      1.00      0.98      1138
         1.0       0.00      0.00      0.00        56

    accuracy                           0.95      1194
   macro avg       0.48      0.50      0.49      1194
weighted avg       0.91      0.95      0.93      1194

[[1138    0]
 [  56    0]]

tanh: 95.56%
              precision    recall  f1-score   support

         0.0       0.96      0.99      0.98      1138
         1.0       0.57      0.21      0.31        56

    accuracy                           0.96      1194
   macro avg       0.77      0.60      0.64      1194
weighted avg       0.94      0.96      0.95      1194

[[1129    9]
 [  44   12]]

relu: 95.31%
              precision    recall  f1-score   support

         0.0       0.95      1.00      0.98      1138
         1.0       0.00      0.00      0.00        56

    accuracy                           0.95      1194
   macro avg       0.48      

  'precision', 'predicted', average, warn_for)


**OverSampling**

In [32]:
xTrainBal, yTrainBal = sm.fit_sample(xTrainPR, yTrainDist.ravel())

printScores(xTrainBal, yTrainBal, xValPR)

identity: 85.76%
              precision    recall  f1-score   support

         0.0       0.98      0.87      0.92      1138
         1.0       0.19      0.62      0.29        56

    accuracy                           0.86      1194
   macro avg       0.58      0.75      0.61      1194
weighted avg       0.94      0.86      0.89      1194

[[989 149]
 [ 21  35]]



  'precision', 'predicted', average, warn_for)


logistic: 95.31%
              precision    recall  f1-score   support

         0.0       0.95      1.00      0.98      1138
         1.0       0.00      0.00      0.00        56

    accuracy                           0.95      1194
   macro avg       0.48      0.50      0.49      1194
weighted avg       0.91      0.95      0.93      1194

[[1138    0]
 [  56    0]]

tanh: 90.12%
              precision    recall  f1-score   support

         0.0       0.98      0.91      0.95      1138
         1.0       0.28      0.70      0.40        56

    accuracy                           0.90      1194
   macro avg       0.63      0.80      0.67      1194
weighted avg       0.95      0.90      0.92      1194

[[1037  101]
 [  17   39]]

relu: 4.69%
              precision    recall  f1-score   support

         0.0       0.00      0.00      0.00      1138
         1.0       0.05      1.00      0.09        56

    accuracy                           0.05      1194
   macro avg       0.02      0

  'precision', 'predicted', average, warn_for)


**UnderSampling**

In [33]:
xTrainBal, yTrainBal = nr.fit_sample(xTrainPR, yTrainDist.ravel())

printScores(xTrainBal, yTrainBal, xValPR)

identity: 84.92%
              precision    recall  f1-score   support

         0.0       0.98      0.86      0.92      1138
         1.0       0.18      0.61      0.27        56

    accuracy                           0.85      1194
   macro avg       0.58      0.73      0.60      1194
weighted avg       0.94      0.85      0.89      1194

[[980 158]
 [ 22  34]]



  'precision', 'predicted', average, warn_for)


logistic: 4.69%
              precision    recall  f1-score   support

         0.0       0.00      0.00      0.00      1138
         1.0       0.05      1.00      0.09        56

    accuracy                           0.05      1194
   macro avg       0.02      0.50      0.04      1194
weighted avg       0.00      0.05      0.00      1194

[[   0 1138]
 [   0   56]]

tanh: 40.37%
              precision    recall  f1-score   support

         0.0       0.97      0.39      0.55      1138
         1.0       0.06      0.75      0.11        56

    accuracy                           0.40      1194
   macro avg       0.51      0.57      0.33      1194
weighted avg       0.93      0.40      0.53      1194

[[440 698]
 [ 14  42]]

relu: 4.69%
              precision    recall  f1-score   support

         0.0       0.00      0.00      0.00      1138
         1.0       0.05      1.00      0.09        56

    accuracy                           0.05      1194
   macro avg       0.02      0.50  

  'precision', 'predicted', average, warn_for)


# Controls and Profitability Ratios Booleans

In [34]:
xTrainPRB = xTrain[:,[1, 2, 3, 13, 14, 15, 16, 19]]
xValPRB = xVal[:,[1, 2, 3, 13, 14, 15, 16, 19]]

**Unbalanced**

In [35]:
printScores(xTrainPRB, yTrainDist, xValPRB)

identity: 95.73%
              precision    recall  f1-score   support

         0.0       0.96      1.00      0.98      1138
         1.0       0.69      0.16      0.26        56

    accuracy                           0.96      1194
   macro avg       0.83      0.58      0.62      1194
weighted avg       0.95      0.96      0.94      1194

[[1134    4]
 [  47    9]]



  'precision', 'predicted', average, warn_for)


logistic: 95.31%
              precision    recall  f1-score   support

         0.0       0.95      1.00      0.98      1138
         1.0       0.00      0.00      0.00        56

    accuracy                           0.95      1194
   macro avg       0.48      0.50      0.49      1194
weighted avg       0.91      0.95      0.93      1194

[[1138    0]
 [  56    0]]



  'precision', 'predicted', average, warn_for)


tanh: 95.31%
              precision    recall  f1-score   support

         0.0       0.95      1.00      0.98      1138
         1.0       0.00      0.00      0.00        56

    accuracy                           0.95      1194
   macro avg       0.48      0.50      0.49      1194
weighted avg       0.91      0.95      0.93      1194

[[1138    0]
 [  56    0]]

relu: 95.31%
              precision    recall  f1-score   support

         0.0       0.95      1.00      0.98      1138
         1.0       0.00      0.00      0.00        56

    accuracy                           0.95      1194
   macro avg       0.48      0.50      0.49      1194
weighted avg       0.91      0.95      0.93      1194

[[1138    0]
 [  56    0]]



  'precision', 'predicted', average, warn_for)


**OverSampling**

In [36]:
xTrainBal, yTrainBal = sm.fit_sample(xTrainPRB, yTrainDist.ravel())

printScores(xTrainBal, yTrainBal, xValPRB)

identity: 87.19%
              precision    recall  f1-score   support

         0.0       0.98      0.88      0.93      1138
         1.0       0.21      0.61      0.31        56

    accuracy                           0.87      1194
   macro avg       0.59      0.75      0.62      1194
weighted avg       0.94      0.87      0.90      1194

[[1007  131]
 [  22   34]]



  'precision', 'predicted', average, warn_for)


logistic: 4.69%
              precision    recall  f1-score   support

         0.0       0.00      0.00      0.00      1138
         1.0       0.05      1.00      0.09        56

    accuracy                           0.05      1194
   macro avg       0.02      0.50      0.04      1194
weighted avg       0.00      0.05      0.00      1194

[[   0 1138]
 [   0   56]]

tanh: 86.43%
              precision    recall  f1-score   support

         0.0       0.98      0.88      0.92      1138
         1.0       0.20      0.62      0.30        56

    accuracy                           0.86      1194
   macro avg       0.59      0.75      0.61      1194
weighted avg       0.94      0.86      0.90      1194

[[997 141]
 [ 21  35]]

relu: 4.69%
              precision    recall  f1-score   support

         0.0       0.00      0.00      0.00      1138
         1.0       0.05      1.00      0.09        56

    accuracy                           0.05      1194
   macro avg       0.02      0.50  

  'precision', 'predicted', average, warn_for)


**UnderSampling**

In [37]:
xTrainBal, yTrainBal = nr.fit_sample(xTrainPRB, yTrainDist.ravel())

printScores(xTrainBal, yTrainBal, xValPRB)

identity: 62.23%
              precision    recall  f1-score   support

         0.0       0.96      0.63      0.76      1138
         1.0       0.06      0.50      0.11        56

    accuracy                           0.62      1194
   macro avg       0.51      0.56      0.44      1194
weighted avg       0.92      0.62      0.73      1194

[[715 423]
 [ 28  28]]



  'precision', 'predicted', average, warn_for)


logistic: 4.69%
              precision    recall  f1-score   support

         0.0       0.00      0.00      0.00      1138
         1.0       0.05      1.00      0.09        56

    accuracy                           0.05      1194
   macro avg       0.02      0.50      0.04      1194
weighted avg       0.00      0.05      0.00      1194

[[   0 1138]
 [   0   56]]

tanh: 82.66%
              precision    recall  f1-score   support

         0.0       0.96      0.85      0.90      1138
         1.0       0.10      0.32      0.15        56

    accuracy                           0.83      1194
   macro avg       0.53      0.59      0.53      1194
weighted avg       0.92      0.83      0.87      1194

[[969 169]
 [ 38  18]]

relu: 4.69%
              precision    recall  f1-score   support

         0.0       0.00      0.00      0.00      1138
         1.0       0.05      1.00      0.09        56

    accuracy                           0.05      1194
   macro avg       0.02      0.50  

  'precision', 'predicted', average, warn_for)


# Controls and Profitability Ratios Non-Boolean

In [38]:
xTrainPRNB = xTrain[:,[1, 2, 3, 10, 11, 12, 17, 18]]
xValPRNB = xVal[:,[1, 2, 3, 10, 11, 12, 17, 18]]

**Unbalanced**

In [39]:
printScores(xTrainPRB, yTrainDist, xValPRB)

identity: 95.73%
              precision    recall  f1-score   support

         0.0       0.96      1.00      0.98      1138
         1.0       0.69      0.16      0.26        56

    accuracy                           0.96      1194
   macro avg       0.83      0.58      0.62      1194
weighted avg       0.95      0.96      0.94      1194

[[1134    4]
 [  47    9]]



  'precision', 'predicted', average, warn_for)


logistic: 95.31%
              precision    recall  f1-score   support

         0.0       0.95      1.00      0.98      1138
         1.0       0.00      0.00      0.00        56

    accuracy                           0.95      1194
   macro avg       0.48      0.50      0.49      1194
weighted avg       0.91      0.95      0.93      1194

[[1138    0]
 [  56    0]]



  'precision', 'predicted', average, warn_for)


tanh: 95.31%
              precision    recall  f1-score   support

         0.0       0.95      1.00      0.98      1138
         1.0       0.00      0.00      0.00        56

    accuracy                           0.95      1194
   macro avg       0.48      0.50      0.49      1194
weighted avg       0.91      0.95      0.93      1194

[[1138    0]
 [  56    0]]

relu: 95.31%
              precision    recall  f1-score   support

         0.0       0.95      1.00      0.98      1138
         1.0       0.00      0.00      0.00        56

    accuracy                           0.95      1194
   macro avg       0.48      0.50      0.49      1194
weighted avg       0.91      0.95      0.93      1194

[[1138    0]
 [  56    0]]



  'precision', 'predicted', average, warn_for)


**OverSampling**

In [40]:
xTrainBal, yTrainBal = sm.fit_sample(xTrainPRNB, yTrainDist.ravel())

printScores(xTrainBal, yTrainBal, xValPRNB)

identity: 84.25%
              precision    recall  f1-score   support

         0.0       0.98      0.85      0.91      1138
         1.0       0.18      0.64      0.28        56

    accuracy                           0.84      1194
   macro avg       0.58      0.75      0.59      1194
weighted avg       0.94      0.84      0.88      1194

[[970 168]
 [ 20  36]]



  'precision', 'predicted', average, warn_for)


logistic: 4.69%
              precision    recall  f1-score   support

         0.0       0.00      0.00      0.00      1138
         1.0       0.05      1.00      0.09        56

    accuracy                           0.05      1194
   macro avg       0.02      0.50      0.04      1194
weighted avg       0.00      0.05      0.00      1194

[[   0 1138]
 [   0   56]]

tanh: 86.85%
              precision    recall  f1-score   support

         0.0       0.98      0.88      0.93      1138
         1.0       0.21      0.64      0.31        56

    accuracy                           0.87      1194
   macro avg       0.59      0.76      0.62      1194
weighted avg       0.94      0.87      0.90      1194

[[1001  137]
 [  20   36]]

relu: 4.69%
              precision    recall  f1-score   support

         0.0       0.00      0.00      0.00      1138
         1.0       0.05      1.00      0.09        56

    accuracy                           0.05      1194
   macro avg       0.02      0.

  'precision', 'predicted', average, warn_for)


**UnderSampling**

In [41]:
xTrainBal, yTrainBal = nr.fit_sample(xTrainPRNB, yTrainDist.ravel())

printScores(xTrainBal, yTrainBal, xValPRNB)

identity: 80.32%
              precision    recall  f1-score   support

         0.0       0.98      0.81      0.89      1138
         1.0       0.15      0.68      0.24        56

    accuracy                           0.80      1194
   macro avg       0.56      0.74      0.57      1194
weighted avg       0.94      0.80      0.86      1194

[[921 217]
 [ 18  38]]



  'precision', 'predicted', average, warn_for)


logistic: 4.69%
              precision    recall  f1-score   support

         0.0       0.00      0.00      0.00      1138
         1.0       0.05      1.00      0.09        56

    accuracy                           0.05      1194
   macro avg       0.02      0.50      0.04      1194
weighted avg       0.00      0.05      0.00      1194

[[   0 1138]
 [   0   56]]

tanh: 84.00%
              precision    recall  f1-score   support

         0.0       0.97      0.86      0.91      1138
         1.0       0.15      0.52      0.23        56

    accuracy                           0.84      1194
   macro avg       0.56      0.69      0.57      1194
weighted avg       0.93      0.84      0.88      1194

[[974 164]
 [ 27  29]]

relu: 41.29%
              precision    recall  f1-score   support

         0.0       0.97      0.40      0.56      1138
         1.0       0.06      0.77      0.11        56

    accuracy                           0.41      1194
   macro avg       0.52      0.58 

# Constants and Capital Structure

**Features**

In [42]:
xTrainCS = xTrain[:,[1, 2, 3, 20, 21, 22]]
xValCS = xVal[:,[1, 2, 3, 20, 21, 22]]

**Unbalanced**

In [43]:
printScores(xTrainCS, yTrainDist, xValCS)

identity: 96.15%
              precision    recall  f1-score   support

         0.0       0.97      0.99      0.98      1138
         1.0       0.63      0.43      0.51        56

    accuracy                           0.96      1194
   macro avg       0.80      0.71      0.75      1194
weighted avg       0.96      0.96      0.96      1194

[[1124   14]
 [  32   24]]



  'precision', 'predicted', average, warn_for)


logistic: 95.31%
              precision    recall  f1-score   support

         0.0       0.95      1.00      0.98      1138
         1.0       0.00      0.00      0.00        56

    accuracy                           0.95      1194
   macro avg       0.48      0.50      0.49      1194
weighted avg       0.91      0.95      0.93      1194

[[1138    0]
 [  56    0]]

tanh: 96.98%
              precision    recall  f1-score   support

         0.0       0.97      0.99      0.98      1138
         1.0       0.79      0.48      0.60        56

    accuracy                           0.97      1194
   macro avg       0.88      0.74      0.79      1194
weighted avg       0.97      0.97      0.97      1194

[[1131    7]
 [  29   27]]

relu: 96.90%
              precision    recall  f1-score   support

         0.0       0.99      0.98      0.98      1138
         1.0       0.65      0.73      0.69        56

    accuracy                           0.97      1194
   macro avg       0.82      

**OverSampling**

In [44]:
xTrainBal, yTrainBal = sm.fit_sample(xTrainCS, yTrainDist.ravel())

printScores(xTrainBal, yTrainBal, xValCS)

identity: 91.46%
              precision    recall  f1-score   support

         0.0       0.99      0.92      0.95      1138
         1.0       0.34      0.86      0.48        56

    accuracy                           0.91      1194
   macro avg       0.67      0.89      0.72      1194
weighted avg       0.96      0.91      0.93      1194

[[1044   94]
 [   8   48]]



  'precision', 'predicted', average, warn_for)


logistic: 95.31%
              precision    recall  f1-score   support

         0.0       0.95      1.00      0.98      1138
         1.0       0.00      0.00      0.00        56

    accuracy                           0.95      1194
   macro avg       0.48      0.50      0.49      1194
weighted avg       0.91      0.95      0.93      1194

[[1138    0]
 [  56    0]]

tanh: 69.26%
              precision    recall  f1-score   support

         0.0       0.99      0.68      0.81      1138
         1.0       0.12      0.91      0.22        56

    accuracy                           0.69      1194
   macro avg       0.56      0.80      0.51      1194
weighted avg       0.95      0.69      0.78      1194

[[776 362]
 [  5  51]]

relu: 67.84%
              precision    recall  f1-score   support

         0.0       0.99      0.67      0.80      1138
         1.0       0.12      0.91      0.21        56

    accuracy                           0.68      1194
   macro avg       0.56      0.79

**UnderSampling**

In [45]:
xTrainBal, yTrainBal = nr.fit_sample(xTrainCS, yTrainDist.ravel())

printScores(xTrainBal, yTrainBal, xValCS)

identity: 92.80%
              precision    recall  f1-score   support

         0.0       0.99      0.93      0.96      1138
         1.0       0.37      0.79      0.51        56

    accuracy                           0.93      1194
   macro avg       0.68      0.86      0.73      1194
weighted avg       0.96      0.93      0.94      1194

[[1064   74]
 [  12   44]]



  'precision', 'predicted', average, warn_for)


logistic: 95.31%
              precision    recall  f1-score   support

         0.0       0.95      1.00      0.98      1138
         1.0       0.00      0.00      0.00        56

    accuracy                           0.95      1194
   macro avg       0.48      0.50      0.49      1194
weighted avg       0.91      0.95      0.93      1194

[[1138    0]
 [  56    0]]

tanh: 79.31%
              precision    recall  f1-score   support

         0.0       0.99      0.79      0.88      1138
         1.0       0.17      0.86      0.28        56

    accuracy                           0.79      1194
   macro avg       0.58      0.82      0.58      1194
weighted avg       0.95      0.79      0.85      1194

[[899 239]
 [  8  48]]

relu: 50.00%
              precision    recall  f1-score   support

         0.0       0.99      0.48      0.65      1138
         1.0       0.08      0.88      0.14        56

    accuracy                           0.50      1194
   macro avg       0.53      0.68

# New Tests 4/20/2020

**Features**

In [46]:
xTrainNT = xTrain[:,[1,2,3, 4, 5, 6, 7, 8,9,26]]
xValNT = xVal[:,[1,2,3, 4, 5, 6, 7, 8,9,26]]
xTestNT = xTestData[:,[1,2,3, 4, 5, 6, 7, 8,9,26]]

**Unbalanced**

In [47]:
printScores(xTrainNT, yTrainDist, xValNT)

identity: 95.98%
              precision    recall  f1-score   support

         0.0       0.97      0.99      0.98      1138
         1.0       0.67      0.29      0.40        56

    accuracy                           0.96      1194
   macro avg       0.82      0.64      0.69      1194
weighted avg       0.95      0.96      0.95      1194

[[1130    8]
 [  40   16]]



  'precision', 'predicted', average, warn_for)


logistic: 95.31%
              precision    recall  f1-score   support

         0.0       0.95      1.00      0.98      1138
         1.0       0.00      0.00      0.00        56

    accuracy                           0.95      1194
   macro avg       0.48      0.50      0.49      1194
weighted avg       0.91      0.95      0.93      1194

[[1138    0]
 [  56    0]]

tanh: 96.15%
              precision    recall  f1-score   support

         0.0       0.97      0.99      0.98      1138
         1.0       0.66      0.38      0.48        56

    accuracy                           0.96      1194
   macro avg       0.81      0.68      0.73      1194
weighted avg       0.96      0.96      0.96      1194

[[1127   11]
 [  35   21]]

relu: 95.31%
              precision    recall  f1-score   support

         0.0       0.95      1.00      0.98      1138
         1.0       0.00      0.00      0.00        56

    accuracy                           0.95      1194
   macro avg       0.48      

  'precision', 'predicted', average, warn_for)


**OverSampling**

In [48]:
xTrainBal, yTrainBal = sm.fit_sample(xTrainNT, yTrainDist.ravel())

printScores(xTrainBal, yTrainBal, xValNT)

identity: 85.76%
              precision    recall  f1-score   support

         0.0       0.99      0.86      0.92      1138
         1.0       0.22      0.82      0.35        56

    accuracy                           0.86      1194
   macro avg       0.61      0.84      0.64      1194
weighted avg       0.95      0.86      0.89      1194

[[978 160]
 [ 10  46]]



  'precision', 'predicted', average, warn_for)


logistic: 95.31%
              precision    recall  f1-score   support

         0.0       0.95      1.00      0.98      1138
         1.0       0.00      0.00      0.00        56

    accuracy                           0.95      1194
   macro avg       0.48      0.50      0.49      1194
weighted avg       0.91      0.95      0.93      1194

[[1138    0]
 [  56    0]]

tanh: 93.80%
              precision    recall  f1-score   support

         0.0       0.99      0.95      0.97      1138
         1.0       0.41      0.75      0.53        56

    accuracy                           0.94      1194
   macro avg       0.70      0.85      0.75      1194
weighted avg       0.96      0.94      0.95      1194

[[1078   60]
 [  14   42]]

relu: 95.31%
              precision    recall  f1-score   support

         0.0       0.95      1.00      0.98      1138
         1.0       0.00      0.00      0.00        56

    accuracy                           0.95      1194
   macro avg       0.48      

  'precision', 'predicted', average, warn_for)


**UnderSampling**

In [49]:
xTrainBal, yTrainBal = nr.fit_sample(xTrainNT, yTrainDist.ravel())

printScores(xTrainBal, yTrainBal, xValNT)

identity: 85.51%
              precision    recall  f1-score   support

         0.0       0.98      0.87      0.92      1138
         1.0       0.19      0.64      0.29        56

    accuracy                           0.86      1194
   macro avg       0.59      0.75      0.61      1194
weighted avg       0.94      0.86      0.89      1194

[[985 153]
 [ 20  36]]

logistic: 95.31%
              precision    recall  f1-score   support

         0.0       0.95      1.00      0.98      1138
         1.0       0.00      0.00      0.00        56

    accuracy                           0.95      1194
   macro avg       0.48      0.50      0.49      1194
weighted avg       0.91      0.95      0.93      1194

[[1138    0]
 [  56    0]]



  'precision', 'predicted', average, warn_for)


tanh: 85.43%
              precision    recall  f1-score   support

         0.0       0.98      0.86      0.92      1138
         1.0       0.19      0.66      0.30        56

    accuracy                           0.85      1194
   macro avg       0.59      0.76      0.61      1194
weighted avg       0.94      0.85      0.89      1194

[[983 155]
 [ 19  37]]

relu: 4.69%
              precision    recall  f1-score   support

         0.0       0.00      0.00      0.00      1138
         1.0       0.05      1.00      0.09        56

    accuracy                           0.05      1194
   macro avg       0.02      0.50      0.04      1194
weighted avg       0.00      0.05      0.00      1194

[[   0 1138]
 [   0   56]]



  'precision', 'predicted', average, warn_for)


# Best UnderSampling Performer

In [50]:
# xTrainBal, yTrainBal = nr.fit_sample(xTrainNT, yTrainDist.ravel())

# scale = StandardScaler().fit(xTrainBal)
# xTrainScaled = scale.transform(xTrainBal)
# xTestScaled = scale.transform(xTestNT)

# svc = svm.SVC(kernel="poly", probability=True)
# svc.fit(xTrainScaled, yTrainBal)
# probs = svc.predict_proba(xTestScaled)
# ids = xTestIds.tolist()
# probs = probs.tolist()

# f = open("SVCTake1.txt", "w+")
# f.write("Unique Id,DIST\n")

# for i in range(len(ids)):
#     f.write(str(int(ids[i])) + "," + str(probs[i][1]) + "\n")
    
# f.close()

In [51]:
# xTrainBal, yTrainBal = sm.fit_sample(xTrainNT, yTrainDist.ravel())

# scale = StandardScaler().fit(xTrainBal)
# xTrainScaled = scale.transform(xTrainBal)
# xTestScaled = scale.transform(xTestNT)

# svc = svm.SVC(kernel="poly", probability=True)
# svc.fit(xTrainScaled, yTrainBal)
# probs = svc.predict_proba(xTestScaled)
# ids = xTestIds.tolist()
# probs = probs.tolist()

# f = open("SVCTake2.txt", "w+")
# f.write("Unique Id,DIST\n")

# for i in range(len(ids)):
#     f.write(str(int(ids[i])) + "," + str(probs[i][1]) + "\n")
    
# f.close()

In [52]:
# xTrainBal, yTrainBal = sm.fit_sample(xTrainNT, yTrainDist.ravel())

# scale = StandardScaler().fit(xTrainBal)
# xTrainScaled = scale.transform(xTrainBal)
# xTestScaled = scale.transform(xTestNT)

# svc = svm.SVC(kernel="linear", probability=True)
# svc.fit(xTrainScaled, yTrainBal)
# probs = svc.predict_proba(xTestScaled)
# ids = xTestIds.tolist()
# probs = probs.tolist()

# f = open("SVCTake2Extra.txt", "w+")
# f.write("Unique Id,DIST\n")

# for i in range(len(ids)):
#     f.write(str(int(ids[i])) + "," + str(probs[i][1]) + "\n")
    
# f.close()

In [53]:
# xTrainBal, yTrainBal = sm.fit_sample(xTrainImp, yTrainDist.ravel())
# # Countdown for hidden layers and perceptrons
# scale = StandardScaler().fit(xTrainBal)
# xTrainScaled = scale.transform(xTrainBal)
# xTestScaled = scale.transform(xTestImp)

# clf = MLPClassifier(activation='relu',hidden_layer_sizes=(10,9,8,7,6,5,4,3,2,1), random_state=1, max_iter=2000)
# clf.fit(xTrainScaled, yTrainBal)
# probs = clf.predict_proba(xTestScaled)
# ids = xTestIds.tolist()
# probs = probs.tolist()

# f = open("NeuralNetworkTake1.txt", "w+")
# f.write("Unique Id,DIST\n")

# for i in range(len(ids)):
#     f.write(str(int(ids[i])) + "," + str(probs[i][1]) + "\n")
    
# f.close()
# print("Converged")

In [54]:
# xTrainBal, yTrainBal = sm.fit_sample(xTrainNT, yTrainDist.ravel())
# # This is for...
# scale = StandardScaler().fit(xTrainBal)
# xTrainScaled = scale.transform(xTrainBal)
# xTestScaled = scale.transform(xTestNT)

# clf = MLPClassifier(activation='tanh',hidden_layer_sizes=(10,9,8,7,6,5,4,3,2,1), random_state=1, max_iter=2000)
# clf.fit(xTrainScaled, yTrainBal)
# probs = clf.predict_proba(xTestScaled)
# ids = xTestIds.tolist()
# probs = probs.tolist()

# f = open("NeuralNetworkTake2.txt", "w+")
# f.write("Unique Id,DIST\n")

# for i in range(len(ids)):
#     f.write(str(int(ids[i])) + "," + str(probs[i][1]) + "\n")
    
# f.close()
# print("Converged")

Converged


In [269]:
# xTrainBal, yTrainBal = sm.fit_sample(xTrainImp, yTrainDist.ravel())
# # This is for...
# scale = StandardScaler().fit(xTrainBal)
# xTrainScaled = scale.transform(xTrainBal)
# xTestScaled = scale.transform(xTestImp)

# clf = MLPClassifier(activation='tanh',hidden_layer_sizes=(2), random_state=1, max_iter=2000)
# clf.fit(xTrainScaled, yTrainBal)
# probs = clf.predict_proba(xTestScaled)
# ids = xTestIds.tolist()
# probs = probs.tolist()

# f = open("NeuralNetworkTake3.txt", "w+")
# f.write("Unique Id,DIST\n")

# for i in range(len(ids)):
#     f.write(str(int(ids[i])) + "," + str(probs[i][1]) + "\n")
    
# f.close()
# print("Converged")

Converged


In [144]:
# xTrainBal, yTrainBal = sm.fit_sample(xTrainImp, yTrainDist.ravel())
# # This is for...
# scale = StandardScaler().fit(xTrainBal)
# xTrainScaled = scale.transform(xTrainBal)
# xTestScaled = scale.transform(xTestImp)

# clf = MLPClassifier(activation='logistic',hidden_layer_sizes=(2), random_state=1, max_iter=2000, learning_rate_init=11)
# clf.fit(xTrainScaled, yTrainBal)
# probs = clf.predict_proba(xTestScaled)
# ids = xTestIds.tolist()
# probs = probs.tolist()

# f = open("NeuralNetworkTake4.txt", "w+")
# f.write("Unique Id,DIST\n")

# for i in range(len(ids)):
#     f.write(str(int(ids[i])) + "," + str(probs[i][1]) + "\n")
    
# f.close()
# print("Converged")

Converged


In [24]:
xTrainBal, yTrainBal = sm.fit_sample(xTrainImp, yTrainDist.ravel())
# This is for...
scale = StandardScaler().fit(xTrainBal)
xTrainScaled = scale.transform(xTrainBal)
xTestScaled = scale.transform(xTestImp)

clf = MLPClassifier(activation='logistic',hidden_layer_sizes=(15), random_state=1, max_iter=2000, learning_rate_init=10)
clf.fit(xTrainScaled, yTrainBal)
probs = clf.predict_proba(xTestScaled)
ids = xTestIds.tolist()
probs = probs.tolist()

f = open("NeuralNetworkTake5.txt", "w+")
f.write("Unique Id,DIST\n")

for i in range(len(ids)):
    f.write(str(int(ids[i])) + "," + str(probs[i][1]) + "\n")
    
f.close()
print("Converged")

Converged
