In [1]:
import pandas as pd
import numpy as np
from joblib import load
import py7zr
from sklearn.metrics import confusion_matrix, roc_auc_score, f1_score
from sklearn.model_selection import cross_validate
import plotly.express as px

In [2]:
#datos train: original_train
#datos test: telecom_churn_me/test/

with py7zr.SevenZipFile('../../../data/telecom_churn_me/original_train.7z', mode='r') as z:
    z.extractall(path='original_train/')
with py7zr.SevenZipFile('../../../data/telecom_churn_me/test/x_test.7z', mode='r') as z:
    z.extractall(path='x_test/')
with py7zr.SevenZipFile('../../../data/telecom_churn_me/test/y_test.7z', mode='r') as z:
    z.extractall(path='y_test/')


In [3]:
train = pd.read_csv('./original_train/original_train.csv')
x_train = train.drop("TARGET", axis=1)
y_train = train.TARGET

x_test = pd.read_csv('./x_test/x_test.csv')
y_test = pd.read_csv('./y_test/y_test.csv')

In [4]:
#load models
mlp = load("../joblibs/telecom_churn_me/model/classification-model.joblib")
logistic_regression = load("../joblibs/telecom_churn_me/model/logistic-regression-model.joblib")

models = [
	{"name": "mlp", "joblib": mlp},
	{"name": "logistic regression", "joblib": logistic_regression}
]
metrics = []

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [5]:
for model in models:
	#train test
	y_pred = model["joblib"].predict(x_test)
	train_score = model["joblib"].score(x_train, y_train)
	test_score = model["joblib"].score(x_test, y_test)
	#confusion matrix
	cm = confusion_matrix(y_pred, y_test)
	cm = pd.DataFrame(cm)
	#cross validation
	cross_validation = cross_validate(estimator=model["joblib"], X=x_test, y=y_test.values.ravel(), cv=3, scoring=['accuracy', 'precision'])
	cross_validation_accuracy = cross_validation["test_accuracy"]
	#roc_auc_curve
	roc_auc = roc_auc_score(y_test, y_pred)
	#f1
	f1 = f1_score(y_test, y_pred)

	metrics_by_model = {
		"name": model["name"],
		"train_score": train_score,
		"test_score": test_score,
		"cm": cm,
		"cross_validation": cross_validation["test_accuracy"],
		"roc_auc": roc_auc,
		"f1_score": f1
	}
	metrics.append(metrics_by_model)



In [6]:

for metric in metrics:
	print(metric)

{'name': 'mlp', 'train_score': 0.9586710302684166, 'test_score': 0.9561430938596824, 'cm':         0      1
0  323890  15007
1       0   3284, 'cross_validation': array([0.87498795, 0.92118183, 0.95753112]), 'roc_auc': 0.5897709255918211, 'f1_score': 0.30442641946697563}
{'name': 'logistic regression', 'train_score': 0.9476317691248874, 'test_score': 0.9460665554195002, 'cm':         0      1
0  323685  18250
1     205     41, 'cross_validation': array([0.87498795, 0.92118183, 0.95753112]), 'roc_auc': 0.5008043043108154, 'f1_score': 0.004423585261908615}
