In [None]:
import os
import csv
import numpy as np
import pandas as pd
import math
import random
import statistics as stat
from sklearn.model_selection import StratifiedKFold
from sklearn.base import clone

from datetime import datetime
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.linear_model import SGDClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn.svm import LinearSVC
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import GradientBoostingClassifier, AdaBoostClassifier, VotingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.manifold import LocallyLinearEmbedding
from sklearn.model_selection import cross_val_score

#funkcja zwracjająca wybrany klasyfikator
def getCLF(clf_name):
    if clf_name == 'SGD':
        return SGDClassifier(random_state=1, max_iter=10000)
    elif clf_name == 'SVC':
        return SVC()
    elif clf_name == 'RFC':
        return RandomForestClassifier(n_estimators=300, max_leaf_nodes=64, n_jobs=-1)
    elif clf_name == 'ETC':
        return ExtraTreesClassifier(n_estimators=300, max_leaf_nodes=32, n_jobs=-1)
    elif clf_name == 'GBC':
        return GradientBoostingClassifier(max_depth=2, n_estimators=300, learning_rate=1.0)
    return None

def readDataFromFiles(path_dat= r'C:\praca\7pix\7pix_X_y'):
    y     =  np.load(path_dat + r'\y.npy')
    x_dat = [np.load(path_dat + rf'\x{i}.npy') for i in range(1,9)]
    return x_dat, y

#funkcja normalizująca dane
def normalizeData(x_dat_org):
    x_dat = []
    for t in x_dat_org:
        x_dat.append(t/(np.max(t)))
    return x_dat

#funkcja przekształcenia danych przez filtr
def getX(individual, x_dat):
    dl= [425,480,545,605,660,725,832,950,]
    q = individual[0::2]
    d = individual[1::2]
    X = np.zeros(x_dat[0].shape)
    i_x, i_y =  X.shape
    for i in range(i_x):
        for j in range(i_y):
            licz = 0.001 
            for ind, e in enumerate(q):
                xtmp = x_dat[ind]
                X[i,j] += dl[ind] if xtmp[i,j] <= e+d[ind] and xtmp[i,j]> e-d[ind] else 0 
                licz += 1 if xtmp[i,j] <= e+d[ind] and xtmp[i,j]> e-d[ind] else 0
            X[i,j] = X[i,j]/licz
    return X


# kroswalidacja  startyfikowana (uwzględniająca w podzbiorach roznorodność klas)
def crosvalid_strat(clf, X_train, y_train, cv = 3, typ='ACC'):
    skfolds = StratifiedKFold(n_splits=cv, shuffle=True,)
    val = []
    for train_index, test_index in skfolds.split(X_train, y_train):
        clone_clf = clone(clf)
        X_train_folds = X_train[train_index]
        y_train_folds = y_train[train_index]
        X_test_fold = X_train[test_index]
        y_test_fold = y_train[test_index]
    
        clone_clf.fit(X_train_folds, y_train_folds)
        y_pred = clone_clf.predict(X_test_fold)

        report  = classification_report(y_test_fold, y_pred, output_dict=True, zero_division=1)
        v = 0
        if(typ=='ACC'): v = report['accuracy']
        if(typ=='F1'):  v = report['1.0']['f1-score']
        val.append(v)

    return (val)

# kroswalidacja zwykła
def crosvalid_val(clf, X_train, y_train, cv = 3, typ='ACC'):
    param_fc = ''
    if(typ=='ACC'): param_fc = 'accuracy'
    if(typ=='F1'):  param_fc = 'f1_micro'
    scores = cross_val_score(clf, X, y, cv=cv, scoring = param_fc)
    #print(scores)
    return scores


if __name__ == "__main__":

#step 1: wczytanie danych
    x_dat, y = readDataFromFiles(r'C:\praca\14pix_multi\24_04_2023\X_y')

#step 2: normalizacja danych
    x_dat = normalizeData(x_dat)

#step 3: wczytanie filtrów

    solution = []
    path_sol = r'C:\praca\result\strategy_I'
    file_list = ['7pix_ACC.csv','14pix_ACC.csv','7pix_F1.csv','14pix_F1.csv',]

    for file in file_list:
        df = pd.read_csv(os.path.join(path_sol, file), sep=";")
        print(df)
        for index, row in df.iterrows():
            if 'GEN' in row['ALG']:
                F = row['IND'].replace('[','').replace(']','')
                F = [float(i) for i in F.split(',')]
                X = getX(F,x_dat)
                clf = getCLF(row['ALG'].replace('GEN_',''))

                sol0 = crosvalid_strat(clf,X,y)
                sol1 = crosvalid_val(clf,X,y)
            
                dic = {}
                dic['file'] = file;
                dic['ALG'] = row['ALG']
                dic['OLD_FIT'] = row['FIT']   

                for i in range(len(sol0)):
                    dic[f'cv_strat{i}'] = sol0[i];
                dic['mean_strat'] = stat.mean(sol0)

                for i in range(len(sol1)):
                    dic[f'cv_norm{i}'] = sol1[i];
                dic['mean_norm'] = stat.mean(sol1)
                solution.append(dic)

    df_sol = pd.DataFrame(solution)
    df_sol.to_csv(os.path.join(path_sol, f'_sol_.csv'), index=False, sep=';')
    print(df_sol)
   
