# Scripts del Proyecto

### Script 1: Preparacion de datos para el entrenamiento

In [None]:
import pandas as pd
import numpy as np

In [None]:
df = pd.read_csv('creditcardcc.csv')

In [None]:
dfp = df[['Time', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9',
       'V10', 'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18',
       'V19', 'V20', 'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27',
       'V28', 'Amount', 'Class']]

dfp.to_csv("../data/processed/creditcard_train.csv")

### Script 2: Código de Entrenamiento

In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from imblearn.under_sampling import NearMiss
import xgboost as xgb
import pickle

In [None]:
# Cargar la tabla transformada
df = pd.read_csv("../data/processed/creditcard_train.csv")
X_train = df.drop(['Class'],axis=1)
y_train = df[['Class']]

In [None]:
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)

In [None]:
nm = NearMiss()

In [None]:
X_res, y_res = nm.fit_resample(X_train, y_train)

In [None]:
# Entrenamos el modelo con toda la muestra
xgb_mod=xgb.XGBClassifier(colsample_bytree=0.7374195520571349,
                      n_estimators=1000, 
                      min_child_weight=0.0,
                      reg_alpha = 158.0,
                      reg_lambda=0.6983089924752687,
                      max_depth=11, 
                      gamma=6.917044807116284)
xgb_mod.fit(X_res, y_res)

In [None]:
# Guardamos el modelo entrenado para usarlo en produccion
filename = '../models/best_model.pkl'
pickle.dump(xgb_mod, open(filename, 'wb'))

### Script 3: Preparación de Datos de Validación

In [None]:
import numpy as np
import pandas as pd

In [None]:
# Leemos la tabla de entrenamiento
df = pd.read_csv("../data/raw/creditcardcc_new.csv")

In [None]:
dfp = df[['Time', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9',
       'V10', 'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18',
       'V19', 'V20', 'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27',
       'V28', 'Amount', 'Class']]

dfp.to_csv("../data/processed/creditcardcc_val.csv")

### Script 4: Código de Validación

In [None]:
import pandas as pd
import xgboost as xgb
import pickle
import matplotlib.pyplot as plt
from sklearn.metrics import *

In [None]:
# Cargar la tabla transformada
df = pd.read_csv("../data/processed/creditcardcc_val.csv")
X_test = df.drop(['Class'],axis=1)
y_test = df[['Class']]

In [None]:
# Leemos el modelo entrenado!
filename = '../models/best_model.pkl'
model = pickle.load(open(filename, 'rb'))

In [None]:
# Predecimos sobre el set de datos de implementacion con el modelo entrenado
y_pred_test=model.predict(df.drop(['Class'],axis=1)) 

In [None]:
## Metricas de validación
def calc_metrics(y_test,y_pred_test):
    cm_test = confusion_matrix(y_test,y_pred_test)
    print("Matriz de confusion: ")
    print(cm_test)
    accuracy_test=accuracy_score(y_test,y_pred_test)
    print("Accuracy: ", accuracy_test)
    precision_test=precision_score(y_test,y_pred_test)
    print("Precision: ", precision_test)
    recall_test=recall_score(y_test,y_pred_test)
    print("Recall: ", recall_test)

In [None]:
def save_plot(title):
    plt.title(title)
    fig = plt.gcf()
    filename = title.replace(" ", "_").lower()
    fig.savefig('{}'.format(filename), dpi=500)
    plt.clf()

In [None]:
# Leemos el modelo entrenado!
filename = '../models/best_model.pkl'
model = pickle.load(open(filename, 'rb'))

In [None]:
# Predecimos sobre el set de datos de implementacion con el modelo entrenado
y_pred_test=model.predict(df.drop(['Class'],axis=1)) 

### Script 5: Preparación de Datos de Score (Automatización)

In [None]:
import numpy as np
import pandas as pd

In [None]:
# Leemos la tabla de entrenamiento
df = pd.read_csv("../data/raw/creditcardcc_score.csv")

In [None]:
# Mantener sólo las variables relevantes para el Modelo
dfp = df[['Time', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9',
       'V10', 'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18',
       'V19', 'V20', 'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27',
       'V28', 'Amount', 'Class']]

dfp.to_csv("../data/processed/creditcardcc_score.csv")

### Scipt 6: Código de Scoring (Automatización)

In [None]:
import pandas as pd
import xgboost as xgb
import pickle

In [None]:
# Cargar la tabla transformada
df = pd.read_csv("../data/processed/creditcardcc_score.csv")

In [None]:
# Leemos el modelo entrenado!
filename = '../models/best_model.pkl'
model = pickle.load(open(filename, 'rb'))

In [None]:
# Predecimos sobre el set de datos de implementacion con el modelo entrenado
scores=model.predict(df).reshape(-1,1)

In [None]:
# Exportamos el resultado del modelo para cargarlo en el Feature Store o Data Mart de Modelos
# Le asignamos nombres a las columnas
df_score = pd.DataFrame(scores, columns=['PREDICT'])
# Exportamos la solucion
df_score.to_csv('../data/scores/final_score.csv')