# Machine Learning


In [15]:

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score, f1_score, roc_auc_score, roc_curve

Se carga el archivo csv

In [16]:
df = pd.read_csv('balanced_data.csv')

In [17]:
# Se dividen los datos en conjuntos de entrenamiento y prueba
X = df.drop(columns='isFraud')
y = df['isFraud']

# Se dividen los datos en conjunto de prueba y entrenamiento
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

### Regresión logistica

In [18]:
# Se crea y entrena el modelo
model = LogisticRegression(max_iter=200)
model.fit(x_train, y_train)
y_pred = model.predict(x_test)

# Se calcula la presición del modelo
accuracy_regresion = accuracy_score(y_test, y_pred)
precision_regresion = precision_score(y_test, y_pred)
recall_regresion = recall_score(y_test, y_pred)
f1_score_regresion = f1_score(y_test, y_pred)
auc_roc_regresion = roc_auc_score(y_test, y_pred)
print("Accuracy del modelo:", accuracy_regresion)
print("Precisión del modelo:", precision_regresion)


Accuracy del modelo: 0.9002556129656295
Precisión del modelo: 0.9032393116221639


### Arboles de decisión

In [19]:
from sklearn.tree import DecisionTreeClassifier

# Se crea el modelo y se entrena
model = DecisionTreeClassifier()
model.fit(x_train, y_train)

# Predicciones
y_pred = model.predict(x_test)

# Calcular la precisión del modelo
accuracy_arbol = accuracy_score(y_test, y_pred)
precision_arbol = precision_score(y_test, y_pred)
recall_arbol = recall_score(y_test, y_pred)
f1_score_arbol = f1_score(y_test, y_pred)
auc_roc_arbol = roc_auc_score(y_test, y_pred)
print("Accuracy del modelo:", accuracy_arbol)
print("Precisión del modelo:", precision_arbol)

Accuracy del modelo: 0.9919244882959044
Precisión del modelo: 0.984141652010612


### Random Forest

In [20]:
from sklearn.ensemble import RandomForestClassifier
# Se inicializa y entrena el modelo
model = RandomForestClassifier(max_depth=30, n_estimators=12, random_state=42)
model.fit(x_train, y_train)
y_pred = model.predict(x_test)
accuracy_random_forest = accuracy_score(y_test, y_pred)
precision_random_forest = precision_score(y_test, y_pred)
recall_random_forest= recall_score(y_test, y_pred)
f1_score_random_forest = f1_score(y_test, y_pred)
auc_roc_random_forest = roc_auc_score(y_test, y_pred)
print("Accuracy del modelo:", accuracy_random_forest)
print("Precisión del modelo:", precision_random_forest)

Accuracy del modelo: 0.9862254530948159
Precisión del modelo: 0.9788109927879197


### Modelo Naive Bayes

In [21]:
from sklearn.naive_bayes import GaussianNB

# se instacia el modelo
model = GaussianNB()

# se etrena el modelo
model.fit(x_train, y_train)

# realizamos predicciones sobre el conjunto de prueba
y_pred = model.predict(x_test)

# calcular la precisión del modelo
accuracy_naive = accuracy_score(y_test, y_pred)
precision_naive = precision_score(y_test, y_pred)
recall_naive = recall_score(y_test, y_pred)
f1_score_naive = f1_score(y_test, y_pred)
auc_roc_naive = roc_auc_score(y_test, y_pred)

print("Accuracy del modelo:", accuracy_naive)
print("Precisión del modelo:", precision_naive)

Accuracy del modelo: 0.7322095561815712
Precisión del modelo: 0.5899556513189549


Creamos una lista para guardar cada metrica  de cada modelo


In [40]:
nombre_modelo = ['Regresión Logistica', 'Arból de Decisión', 'Random Forest', 'Naive Bayes']
accuracy = [accuracy_regresion, accuracy_arbol, accuracy_random_forest, accuracy_naive]
precision = [precision_regresion, precision_arbol, precision_random_forest, precision_naive]
recall = [recall_regresion, recall_arbol, recall_random_forest, recall_naive]
f1Score = [f1_score_regresion, f1_score_arbol, f1_score_random_forest, f1_score_naive]
auc_roc = [auc_roc_regresion, auc_roc_arbol, auc_roc_random_forest, auc_roc_naive]


Guardamos en un diccionario las listas anteriores

In [45]:
resultados = {
    'Nombre_modelo':nombre_modelo,
    'Accuracy': accuracy,
    'Precision':precision,
    'Recall': recall,
    'F1-score': f1Score,
    'AUC_ROC': auc_roc
}

Se crea un DataFrame a partir del diccionario

In [44]:
df_metricas = pd.DataFrame(resultados).set_index('Nombre_modelo')
df_metricas

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1-score,AUC_ROC
Nombre_modelo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Regresión Logistica,0.900256,0.903239,0.830035,0.865091,0.887151
Arból de Decisión,0.991924,0.984142,0.995075,0.989578,0.992512
Random Forest,0.986225,0.978811,0.985584,0.982186,0.986106
Naive Bayes,0.73221,0.589956,1.0,0.742103,0.782183
