In [None]:
import pandas as pd
import numpy as np
import utils
import model

from sklearn.metrics import roc_curve, auc
from sklearn import datasets
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import LinearSVC
from sklearn.preprocessing import label_binarize
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

In [None]:
sqlForTraining = """
select
	eta.idetablissement,
	eta.departement,
	eta.siren,
	eta.geores_lat,
	eta.geores_lon,
	eta.nb_agrements,
	eta.evolution_score,
	act.idactivite,
	act.categorie_frais,
	CASE 
      WHEN ins.synthese_eval = 'Très satisfaisant'  THEN 4
      WHEN ins.synthese_eval = 'Satisfaisant'  THEN 3
      WHEN ins.synthese_eval = 'A améliorer'  THEN 2
      WHEN ins.synthese_eval = 'A corriger de manière urgente'  THEN 1
	END	as synthese_eval
from inspection ins
join etablissement eta on ins.idetablissement = eta.idetablissement
join activite act on ins.idactivite = act.idactivite
order by eta.idetablissement
"""



In [None]:
df = pd.read_sql_query(sqlForTraining, model.session.connection())

df.info()

In [None]:
X = df[['geores_lat', 'geores_lon', 'nb_agrements', 'evolution_score', 'idactivite', 'categorie_frais']]
X['categorie_frais'] = X['categorie_frais'].astype('int')
y = df['synthese_eval']

In [None]:
print(np.unique(y, return_counts=True))

In [None]:
X.info()

In [None]:
from yellowbrick.features import Rank2D

r2D = Rank2D(algorithm='pearson')
r2D.fit(X)
r2D.transform(X)
r2D.show()

In [None]:
print(np.corrcoef(X, rowvar=False))

In [None]:
from yellowbrick.target import FeatureCorrelation
fc = FeatureCorrelation(method='mutual_info-classification', sort=True)
fc.fit(X, y, discrete_features=False)
fc.show()

In [None]:
XTrain, XTest, yTrain, yTest = train_test_split(X, y, train_size=0.8, random_state=42, stratify=y)

In [None]:
print(yTrain.value_counts(normalize=True))
print(yTest.value_counts(normalize=True))


In [None]:
from sklearn.preprocessing import StandardScaler
stdSc = StandardScaler()

In [None]:
ZTrain = stdSc.fit_transform(XTrain)
ZTest = stdSc.transform(XTest)

In [None]:
from sklearn.linear_model import LogisticRegression
reg = LogisticRegression(solver='liblinear')
reg.fit(ZTrain, yTrain)

In [None]:
from yellowbrick.model_selection import FeatureImportances
clfFI = FeatureImportances(LogisticRegression(solver='liblinear'), relative=False, topn=6, labels=XTrain.columns)
clfFI.fit(ZTrain, yTrain)
clfFI.show()

In [None]:
from yellowbrick.model_selection import RFECV
from sklearn.metrics import make_scorer, f1_score
clfFE = RFECV(LogisticRegression(solver='liblinear'), cv=3, scoring=make_scorer(f1_score,average='micro'))
clfFE.fit(ZTrain, yTrain)
clfFE.show()

In [None]:
print(XTrain.columns[clfFE.support_==False])

In [None]:
from yellowbrick.classifier import ConfusionMatrix
clfConfusion = ConfusionMatrix(reg)
clfConfusion.score(ZTest, yTest)
clfConfusion.show()

In [None]:
from yellowbrick.classifier import ClassificationReport
clfReport = ClassificationReport(reg)
clfReport.score(ZTest, yTest)
clfReport.show()