In [None]:
import os
import csv
import numpy as np
import pandas as pd
import math
import random
from datetime import datetime
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.linear_model import SGDClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn.svm import LinearSVC
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import GradientBoostingClassifier, AdaBoostClassifier, VotingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.manifold import LocallyLinearEmbedding

import random
from deap import base
from deap import creator
from deap import tools


#klasa zawierająca globalne parametry pracy algorytmów
class SETUP:
    clf_names = ['SGD','RFC','SVC','ETC','GBC']
    clf_name  = 'RFC'

    dl        = [425,480,545,605,660,725,832,950,]
    x_dat     = None
    y         = None

    #save parametr
    path_to_csv  = r'C:\praca\result\7pix\ACC'
    name_of_file = 'sol'

    #tmp
    nb_of_inv    = -1

#funkcja odczytująca dane
def readDataFromFiles(path_dat= r'D:\!jud\7pix_X_y'):
    y     =  np.load(path_dat + r'\y.npy')
    x_dat = [np.load(path_dat + rf'\x{i}.npy') for i in range(1,9)]
    return x_dat, y

#funkcja normalizująca dane
def normalizeData(x_dat_org):
    x_dat = []
    for t in x_dat_org:
        x_dat.append(t/(np.max(t)))
    return x_dat

#funkcja zwracjająca wybrany klasyfikator
def getCLF():
    if SETUP.clf_name == 'SGD':
        return SGDClassifier(random_state=1, max_iter=10000)
    elif SETUP.clf_name == 'SVC':
        return SVC()
    elif SETUP.clf_name == 'RFC':
        return RandomForestClassifier(n_estimators=300, max_leaf_nodes=64, n_jobs=-1)
    elif SETUP.clf_name == 'ETC':
        return ExtraTreesClassifier(n_estimators=300, max_leaf_nodes=32, n_jobs=-1)
    elif SETUP.clf_name == 'GBC':
        return GradientBoostingClassifier(max_depth=2, n_estimators=300, learning_rate=1.0)
    return None
        
#funkcja oceny rozwiazania
def evalOneMax(individual):
    #-----------------------
    dl    = SETUP.dl
    x_dat = SETUP.x_dat
    y     = SETUP.y
    #-----------------------
    q = individual[0::2]
    d = individual[1::2]
    X = np.zeros(x_dat[0].shape)
    i_x, i_y =  X.shape
    for i in range(i_x):
        for j in range(i_y):
            licz = 0.001 
            for ind, e in enumerate(q):
                xtmp = x_dat[ind]
                X[i,j] += dl[ind] if xtmp[i,j] <= e+d[ind] and xtmp[i,j]> e-d[ind] else 0 
                licz += 1 if xtmp[i,j] <= e+d[ind] and xtmp[i,j]> e-d[ind] else 0
            X[i,j] = X[i,j]/licz
    splitter = StratifiedShuffleSplit(n_splits=42, test_size=0.3, random_state=1)
    for train_id, test_id in splitter.split(X, y):
        X_train, y_train = X[train_id], y[train_id]
        X_test,  y_test  = X[test_id], y[test_id]
    clf = getCLF()
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    report  = classification_report(y_test, y_pred, output_dict=True, zero_division=1)
    return report['accuracy'],  #['1.0']['f1-score']



def setup_genetic_algorithm():
    creator.create("FitnessMax", base.Fitness, weights=(1.0,))
    creator.create("Individual", list, fitness=creator.FitnessMax)

def genetic_algorithm(nbPop = 10, nbGenerations = 100, CXPB = 0.6, MUTPB= 0.05, startPopulations = None):



    #---------------------------------
    hof = tools.HallOfFame(maxsize=1)
    hof.b_Gen = 0

    def update_hof(population, nb_Gen):
        fc = hof[0].fitness.values[0] if len(hof)>0 else 0;
        hof.update(population)
        if fc < hof[0].fitness.values[0]:
            hof.b_Gen = nb_Gen
    #--------------------------------
    
    toolbox = base.Toolbox()
    toolbox.register("attr_float", random.uniform, 0, 1)           
    toolbox.register("individual", tools.initRepeat, creator.Individual,toolbox.attr_float, 16) 
    toolbox.register("population", tools.initRepeat, list, toolbox.individual)
    toolbox.register("evaluate", evalOneMax)
    toolbox.register("mate", tools.cxTwoPoint)
    #toolbox.register("mutate", tools.mutFlipBit, indpb=0.05)
    toolbox.register("mutate", tools.mutUniformInt, low=0, up=1, indpb=0.3)
    toolbox.register("select", tools.selTournament, tournsize=3)
    #----------------------------------------------------------------------------------------------
    
    pop = toolbox.population(n=nbPop)
    SETUP.nb_of_inv = -1
    for inv in pop:
        SETUP.nb_of_inv += 1
        inv.fitness.values = start_population[SETUP.clf_name][SETUP.nb_of_inv][0]
        for j in range(len(inv)):
            val = start_population[SETUP.clf_name][SETUP.nb_of_inv][1][j]
            inv[j] = val

    #----------------------------------------------------------------------------------------------
       


    fits = [ind.fitness.values[0] for ind in pop]
    g = 0
    tab_stat =[]
    while max(fits) < 0.99 and g < nbGenerations:           
        g = g + 1
        print(f'Generation {g}/{nbGenerations}')
        offspring = toolbox.select(pop, len(pop))
        offspring = list(map(toolbox.clone, offspring))

        for child1, child2 in zip(offspring[::2], offspring[1::2]):
            if random.random() < CXPB:
                toolbox.mate(child1, child2)
                del child1.fitness.values
                del child2.fitness.values

        for mutant in offspring:
            if random.random() < MUTPB:
                toolbox.mutate(mutant)
                del mutant.fitness.values

        invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
        fitnesses = map(toolbox.evaluate, invalid_ind)
        for ind, fit in zip(invalid_ind, fitnesses):
            ind.fitness.values = fit

        pop[:] = offspring

        fits = [ind.fitness.values[0] for ind in pop]

        length = len(pop)
        mean = sum(fits) / length
        sum2 = sum(x*x for x in fits)
        std = abs(sum2 / length - mean**2)**0.5
        print(f'\t Max:{max(fits):.3f}, Min:{min(fits):.3f}, Avg:{mean:.3f}, Std:{std:.3f}')

        tab_stat.append([min(fits), max(fits), mean, std])
        update_hof(pop, g) 

    return hof[0], hof[0].fitness.values[0], hof.b_Gen

def random_algorithm(nb_iterations = 10):
    fc = type('FC',(object,),{})
    fc.inv = []
    fc.fit = 0
    fc.ite = 0 
    fc.all = []

    for i in range(nb_iterations):
        inv = [random.random() for i in range(16)]
        fit = evalOneMax(inv)
        fc.all.append((fit,inv))
        print(f'{i/nb_iterations*100:05.1f}% -> Now: {fit[0]:.4f}, Best: {fc.fit:.4f}')
        if fc.fit < fit[0]:
            fc.inv = inv
            fc.fit = fit[0]
            fc.ite = i
        bests = sorted(fc.all, key=lambda x: x[0], reverse=True)

    return fc.inv, fc.fit, fc.ite, bests[:100]

def addSolution(df, row):
    #zapisuje plik na dysk
    df = df.append(row, ignore_index=True)
    current_datetime = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    file_name = f'{SETUP.name_of_file}_{current_datetime}.csv'
    path = os.path.join(SETUP.path_to_csv, file_name)
    df.to_csv(path, index=False, sep=';')
    return df



if __name__ == "__main__":


#step 1: wczytanie danych
    x_dat, y = readDataFromFiles(r'C:\praca\7pix\7pix_X_y')

#step 2: normalizacja danych
    x_dat = normalizeData(x_dat)

#step 3: ustawienie danych w obiekcie SETUP z parametrami globalnymi
    SETUP.x_dat = x_dat
    SETUP.y     = y

#step 4: przygotowanie obiektu DataFrame do zbierania wyników
    df = pd.DataFrame(columns=['ALG', 'FIT', 'ITE', 'IND'])

#----------EKSPERYMENTY OBLICZENIOWE---------
    start_population = {}
    setup_genetic_algorithm()

#step 5: algorytm losowy
    for clf in SETUP.clf_names:
        SETUP.clf_name = clf
        print(f'RANDOM for {clf}')
        I, F, G, SP = random_algorithm(1000) 
        df = addSolution(df, {'ALG':f'RND_{clf}', 'FIT':F, 'ITE':G, 'IND':I})
        start_population[clf] = SP

#step 6: algorytm genetyczny dla klasyfikatorów
        SETUP.clf_name = clf
        print(f'GENETIC for {clf}')
        I, F, G = genetic_algorithm(100,30,0.65,0.02) 
        df = addSolution(df, {'ALG':f'GEN_{clf}', 'FIT':F, 'ITE':G, 'IND':I})
#----------------------------------------------

#step 7: wyświetlenie wyników
    print(df)







