In [69]:

from datetime import datetime
from os import mkdir

import numpy as np
import pandas as pd
from scipy.sparse import data
from sklearn.model_selection import train_test_split
from sklearn.linear_model import SGDClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score
from joblib import dump,load


In [9]:
class DataHandler:
    """
        Get data from sources
        construction des fichier avec 2 fichier en arg qu'on va grouper
    """

    def __init__(self, csvfile1: str, csvfile2: str):
        self.csvfile1 = self.getCsvfile(csvfile1)
        self.csvfile2 = self.getCsvfile(csvfile2)
        self.gouped_data = pd.concat([self.csvfile1, self.csvfile2])

    def getCsvfile(self,filename: str):
        return pd.read_csv(filename)

data = DataHandler("utils/Testing.csv","utils/Training.csv")

In [10]:
class FeatureRecipe:
    """
    Feature processing class
    cree le dataframe avec le DataHandler(csv1,csv2)
    """

    def __init__(self, data: pd.DataFrame, continus: bool, type_data: bool):
        self.data = data
        self.continuous = continus
        self.categorical = type_data
        self.discrete = not continus

Feature = FeatureRecipe(data.gouped_data,False,True)
Feature.data.head(5)

Unnamed: 0,itching,skin_rash,nodal_skin_eruptions,continuous_sneezing,shivering,chills,joint_pain,stomach_pain,acidity,ulcers_on_tongue,...,blackheads,scurring,skin_peeling,silver_like_dusting,small_dents_in_nails,inflammatory_nails,blister,red_sore_around_nose,yellow_crust_ooze,prognosis
0,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
1,0,0,0,1,1,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Allergy
2,0,0,0,0,0,0,0,1,1,1,...,0,0,0,0,0,0,0,0,0,GERD
3,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Chronic cholestasis
4,1,1,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,Drug Reaction


In [11]:
from sklearn.model_selection import train_test_split

def infosFeature(feat):
    print(f"liste features : {feat.X}")
    print(f"target : {feat.y}")
    
class FeatureExtractor:
    """
    Feature Extractor class
    avec FeatureRecipe(), flist
    """

    def __init__(self, data: pd.DataFrame, target: str):
        self.data = data
        self.y = data[target]
        self.X = self.getFlist(data,target)
        self.X_train, self.X_test, self.y_train, self.y_test = self.getSplitData(self.X, self.y)
        """
        Input: pandas.DataFrame, feature list to drop
        Output: X_train, X_test, y_train, y_test according to sklearn.model_selection.train_test_split
        """
    def getSplitData(self,X, y):
        return train_test_split(X, y, test_size=0.2)

    def getFlist(self,data, target):
        return data.drop(target, axis=1)



In [12]:
feat = FeatureExtractor(Feature.data, "prognosis")

In [13]:
infosFeature(feat)

liste features :     itching  skin_rash  nodal_skin_eruptions  continuous_sneezing  shivering  \
0         1          1                     1                    0          0   
1         0          0                     0                    1          1   
2         0          0                     0                    0          0   
3         1          0                     0                    0          0   
4         1          1                     0                    0          0   
..      ...        ...                   ...                  ...        ...   
0         0          0                     0                    0          0   
0         1          0                     0                    0          0   
0         0          0                     0                    0          0   
0         1          0                     0                    0          0   
0         1          0                     0                    0          0   

    chills  joint_pain

In [89]:
class ModelBuilder:
    """
    Class for train and print results of ml model
    """

    def __init__(self, model_path: str, save: bool, data: FeatureExtractor):
        self.save = save
        self.model_path = model_path
        self.model = self.loadModel()
        self.data = data

    def __repr__(self):
        pass

    def train(self, X, Y):
        self.model.fit(X, Y)

    def predictTest(self, X):  # -> np.ndarray:
        self.model.predict(X)

    def predictFromDump(self, X) -> np.ndarray:
        pass

    def saveModel(self, model_name: str):
        date = datetime.now()
        path = '../Model_Save/'
        extension = ".joblib"
        d = "_{}_{}_{}".format(date.day, date.month, date.year)
        path = "{}{}_{}{}".format(path, model_name, d, extension)
        try:
            dump(self.model, path)
        except FileNotFoundError:
            mkdir("../Model_Save")
            dump(self.model, path)
            print(f"le model {model_name} à bien été sauvegarder")
        else:
            print(f"le model {model_name} à bien été sauvegarder")

        # with the format : ‘model_{}_{}’.format(date)

    def printAccuracy(self):
        predict = self.predictTest(self.data.X_train)
        print(accuracy_score(predict, self.data.y_test)*100)


    def loadModel(self):
        model_default = make_pipeline(
            StandardScaler(), SGDClassifier(max_iter=1000, tol=1e-3))
        if self.save == False:
            print("save = False, on charge donc le Model par defaut")
            return model_default
        try:
            # load model
            return load(self.model_path)

        except FileNotFoundError:
            print(
                f"Erreur, Il n'existe aucun model du nom de {self.model_path}")
            print("Chargement du default model")
            return model_default




In [90]:
Modelu = ModelBuilder("../Model_Save/model1_model_3_1_2022.joblib",True,feat)
Modeluu = ModelBuilder("",False,feat)




save = False, on charge donc le Model par defaut
