<a href="https://colab.research.google.com/github/Tommy3072/MineriaDeDatos/blob/main/Taller7.2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_curve, auc, ConfusionMatrixDisplay
from sklearn.linear_model import LogisticRegressionCV

# Función para generar gráficos ROC
def generate_graphs(allPredictions, allLabels, rocFilenamePrefix, classesNames=None, colorsNames=None):
    NUM_CLASSES = len(classesNames)
    if NUM_CLASSES == 2:
        allPositiveScores = []
        max_score, min_score, pos_label = allPredictions[0][0], allPredictions[0][0], 0
        for prediction in allPredictions:
            for i in range(NUM_CLASSES):
                if max_score < prediction[i]:
                    max_score = prediction[i]
                if min_score > prediction[i]:
                    min_score = prediction[i]
        max_score = max_score + abs(min_score)
        min_score = min_score + abs(min_score)
        for prediction in allPredictions:
            allPositiveScores.append((prediction[pos_label] + abs(min_score)) / (max_score - min_score))

        fpr, tpr, _ = roc_curve(allLabels, allPositiveScores, pos_label=pos_label)
        auc_score = auc(fpr, tpr)
        plt.figure()
        graphLabel = 'AUC={0:.3f}'.format(auc_score)
        plt.plot(fpr, tpr, color='darkorange', lw=2, label=graphLabel)
        plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.0])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title('Receiver Operating Characteristic ')
        plt.legend(loc="lower right")
        plt.tight_layout()
        plt.savefig(rocFilenamePrefix + "-roc.png", pad_inches=5)
    else:
        # Multiclass-case AUCs
        roc_auc, fpr, tpr = dict(), dict(), dict()
        for i in range(NUM_CLASSES):
            max_score, min_score = allPredictions[0][i], allPredictions[0][i]
            for prediction in allPredictions:
                if max_score < prediction[i]:
                    max_score = prediction[i]
                if min_score > prediction[i]:
                    min_score = prediction[i]
            max_score = max_score + abs(min_score)
            min_score = min_score + abs(min_score)
            allPositiveScores = []
            for prediction in allPredictions:
                allPositiveScores.append((prediction[i] + abs(min_score)) / (max_score - min_score))
            fpr[i], tpr[i], _ = roc_curve(allLabels, allPositiveScores, pos_label=i)
            roc_auc[i] = auc(fpr[i], tpr[i])

        all_fpr = np.unique(np.concatenate([fpr[i] for i in range(NUM_CLASSES)]))
        mean_tpr = np.zeros_like(all_fpr)
        for i in range(NUM_CLASSES):
            mean_tpr += np.interp(all_fpr, fpr[i], tpr[i])
        mean_tpr /= NUM_CLASSES

        fpr["macro"], tpr["macro"] = all_fpr, mean_tpr
        roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])

        plt.figure()
        plt.plot(fpr["macro"], tpr["macro"], label='macro-average (AUC={0:0.3f})'.format(roc_auc["macro"]),
                 color='navy', linestyle=':', linewidth=4)

        if colorsNames is None:
            colorsNames = ['aqua', 'darkorange', 'cornflowerblue', 'deeppink']

        for i, color in zip(range(NUM_CLASSES), colorsNames):
            plt.plot(fpr[i], tpr[i], color=color, lw=2,
                     label='{0} (AUC={1:0.3f})'.format(classesNames[i], roc_auc[i]))

        plt.plot([0, 1], [0, 1], 'k--', lw=2)
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.0])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title('Receiver Operating Characteristic')
        plt.legend(loc="lower right")
        plt.tight_layout()
        plt.savefig(rocFilenamePrefix + "-roc.png", pad_inches=5)

# Carga del dataset de crédito
df = pd.read_csv("credito.csv")
X = df[["balance_control", "duracion_prestamo", "historial_credito", "edad",
        "saldo_ahorro", "longitud_empleo", "tasa_instalacion", "creditos_existentes"]]
y = df["monto"]

# División en train/test/eval
x_train, x_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, train_size=0.6)
x_test, x_eval, y_test, y_eval = train_test_split(x_temp, y_temp, test_size=0.5, train_size=0.5)

# Entrenamiento del modelo
model = LogisticRegression(solver='liblinear', random_state=0).fit(x_train, y_train)

# Evaluación en conjunto de validación (eval)
print(str(model.classes_))
print(str(model.predict_proba(x_eval)))
print(str(model.predict(x_eval)))

cm_1 = confusion_matrix(y_eval, model.predict(x_eval), labels=model.classes_)
disp = ConfusionMatrixDisplay(confusion_matrix=cm_1, display_labels=model.classes_)
disp.plot(cmap=plt.cm.Blues, values_format='d')
plt.tight_layout()
plt.savefig("lr-eval.png")
print(str(cm_1))
print(classification_report(y_eval, model.predict(x_eval)))

# Generación de gráfico ROC para el conjunto de validación
generate_graphs(model.predict_proba(x_eval), y_eval, "roc-eval-", classesNames=model.classes_, colorsNames=None)

# Evaluación en conjunto de prueba (test)
print(str(model.predict_proba(x_test)))
print(str(model.predict(x_test)))

cm_2 = confusion_matrix(y_test, model.predict(x_test), labels=model.classes_)
disp = ConfusionMatrixDisplay(confusion_matrix=cm_2, display_labels=model.classes_)
disp.plot(cmap=plt.cm.Blues, values_format='d')
plt.tight_layout()
plt.savefig("lr-test.png")
print(str(cm_2))
print(classification_report(y_test, model.predict(x_test)))

# Generación de gráfico ROC para el conjunto de prueba
generate_graphs(model.predict_proba(x_test), y_test, "roc-test-", classesNames=model.classes_, colorsNames=None)

# Cross-validation con 5-folds
cv_model = LogisticRegressionCV(cv=5, random_state=0).fit(X, y)
print(str(cv_model.predict(X)))
print(str(cv_model.predict_proba(X)))
print(str(cv_model.score(X, y)))

# Generación de gráfico ROC para todo el dataset (cross-validation)
generate_graphs(model.predict_proba(X), y, "roc-cv-", classesNames=model.classes_, colorsNames=None)


[  250   276   338   339   343   362   368   385   392   409   426   428
   448   454   458   484   518   571   585   601   609   618   629   639
   640   652   654   666   684   701   717   719   727   745   750   754
   759   760   763   766   776   783   790   795   802   804   806   841
   846   860   866   882   886   888   900   909   915   918   926   929
   932   939   958   959   975   976   996  1024  1037  1038  1047  1048
  1049  1055  1056  1068  1076  1092  1098  1101  1107  1113  1123  1126
  1131  1138  1154  1155  1164  1168  1185  1188  1190  1199  1200  1203
  1206  1207  1213  1216  1223  1224  1231  1236  1237  1238  1239  1245
  1249  1255  1258  1262  1264  1271  1274  1275  1278  1282  1283  1287
  1288  1289  1291  1295  1297  1299  1308  1309  1311  1313  1316  1318
  1322  1323  1330  1331  1333  1337  1338  1343  1344  1345  1347  1352
  1355  1360  1361  1364  1374  1376  1377  1382  1386  1388  1391  1393
  1402  1403  1409  1413  1414  1424  1433  1444  1