In [0]:
# ----------------------------------------------------------
import pandas as pd
import numpy as np
from copy import deepcopy
from tqdm import tqdm
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_classif
# ----------------------------------------------------------
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.metrics import confusion_matrix
# ----------------------------------------------------------

# import data from from a csv file
csv_url = 'https://raw.githubusercontent.com/Elrathos/ziwm-project/master/stany_ostrego_brzucha_dane.csv'
X = pd.read_csv(csv_url)
y = X.pop('class')

# tuple storage: BestScore, Momentum (True/False), layerSize, numFeatures, confusionMatrix
BestScore = [0, True, 0, 0, np.ndarray]
ListScore = []

# --------------------------------------------------------------------------------------------------------------------
# FUNCTIONS DELCARATIONS
# --------------------------------------------------------------------------------------------------------------------
# function which select the best features using the SelectKBest selector
def selection(X, y, numFeatures):
    # f_classif - function that calculates the value of variance analyzes
    fvalue_selector = SelectKBest(f_classif, k=numFeatures)
    X_kbest = fvalue_selector.fit(X, y)
    ranking = X_kbest.scores_
      
    pd_ranking = pd.DataFrame(ranking, columns=['Result'])
    pd_ranking["Feature"] = features
    return pd_ranking

def classification(X=X, y=y, relu=True, layerSize=100, bestScore=BestScore, numFeatures=5):
    fvalue_selector = SelectKBest(f_classif, k=numFeatures)  #f_classif - function that calculates the value of variance analyzes + number of features 
    X_fit = fvalue_selector.fit_transform(X, y)  # reduction to a selected number of features (based on the paratemeter set in classification numFeatures)

    # Creating multiple 'K-Fold cross validators' splited into 2 groups: train and test. Repeated 5 times, with random state 432
    rkf = RepeatedStratifiedKFold(n_splits=2, n_repeats=5, random_state=432)

    # 'relu', the rectified linear unit function
    if relu:
        mlp = MLPClassifier(hidden_layer_sizes=layerSize, solver='sgd', max_iter=1000)
    else:
        mlp = MLPClassifier(hidden_layer_sizes=layerSize, activation='logistic', solver='sgd', max_iter=1000)

    # Loop for 5 times cross-validation (train moves on indexes of group 1 (train), test moves on group 2 (test))
    for train, test in rkf.split(X_fit, y):
        # Assigning x_train, x_test lists
        x_train, x_test = X_fit[train], X_fit[test]  
        # Assigning y_train, y_test lists
        y_train, y_test = y[train], y[test]  

        # Fitting x_train to y_train
        mlp.fit(x_train, y_train) 

        # Assignment of mean accuracy on the given test data based on the new sets
        score = mlp.score(x_test, y_test)

        # Predict the created mlp using the x_test set
        predict = mlp.predict(x_test)  

        # print("Training set score: {:.2f}".format(mlp.score(x_train, y_train)))
        # print("Test set score: {:.2f}".format(mlp.score(x_test, y_test)))

        # The y_test confusionMatrix contains valid classes, predict contains predicted classes
        confusionMatrix = confusion_matrix(y_test, predict)

        # print(confusionMatrix)
        ListScore.append([score, relu, layerSize, numFeatures, confusionMatrix])
         # Assigning the most best score to the list
        if bestScore[0] < score: 
            bestScore = [score, relu, layerSize, numFeatures, confusionMatrix]
            BestScore = deepcopy(bestScore)

    return bestScore  # Return bestScore tuple
# --------------------------------------------------------------------------------------------------------------------
# END OF FUNCTIONS DELCARATIONS
# --------------------------------------------------------------------------------------------------------------------

# get a list of the features from the header
features = list(X)

# print the results and position of most useful features
sortedRanking = selection(X, y, 1).sort_values(by="Result", ascending=0)
sortedRanking["Position"] = [i for i in range(1,32)]
print(f"{sortedRanking}\n")



# run MLP with 100, 200 and 500 neurons

layerCount = [100, 200, 500]
function = [True, False]

for layer in layerCount:
    for func in function:
        for i in tqdm(range(1, 9)):
            BestScore = classification(relu=func, layerSize=layer, bestScore=BestScore, numFeatures=i)

print(BestScore)
dflist = pd.DataFrame(ListScore)
dflist.to_csv('/content/drive/My Drive/results.csv', encoding='utf-8', index=False)
