In [10]:
import pandas as pd

In [11]:
df_customers = pd.read_csv("customers.csv")
df_sales = pd.read_csv("sales.csv")

In [12]:
df_sales

Unnamed: 0,customer_id,month,amount,churn_next_month
0,100000,202102,48.0726,0.0
1,100000,202306,40.2995,0.0
2,100000,202308,32.8990,0.0
3,100000,202207,50.7222,0.0
4,100000,202212,47.8690,0.0
...,...,...,...,...
5386170,399999,202004,111.3280,0.0
5386171,399999,202302,0.0000,0.0
5386172,399999,202310,0.0000,0.0
5386173,399999,202210,93.5929,0.0


In [13]:
df_sales = df_sales.dropna(subset='churn_next_month')
df_sales

Unnamed: 0,customer_id,month,amount,churn_next_month
0,100000,202102,48.0726,0.0
1,100000,202306,40.2995,0.0
2,100000,202308,32.8990,0.0
3,100000,202207,50.7222,0.0
4,100000,202212,47.8690,0.0
...,...,...,...,...
5386169,399999,202203,17.6118,0.0
5386170,399999,202004,111.3280,0.0
5386171,399999,202302,0.0000,0.0
5386172,399999,202310,0.0000,0.0


In [14]:
# Remove clients with less than 24 registered sales
customers = df_sales['customer_id'].value_counts()
customers = customers[customers >= 24].index
df_sales = df_sales[df_sales['customer_id'].isin(customers)]

In [15]:
# Filtrando los customer_id donde churn_next_month es igual a 1
churn_customers = df_sales[df_sales['churn_next_month'] == 1]['customer_id']
churn_customers

146        100004
250        100007
704        100037
865        100046
1010       100054
            ...  
5384657    399922
5384839    399930
5385192    399951
5385582    399966
5386066    399987
Name: customer_id, Length: 24746, dtype: int64

In [16]:
# Fusionar los DataFrames en función de customer_id
df = pd.merge(df_customers, df_sales, on="customer_id")

df_grouped = df.sort_values(by='month', inplace=True)

# Convertir la variable amount en columnas
df_grouped = df.groupby('customer_id')['amount'].apply(list).reset_index()


# Determinar la longitud máxima de las listas de amount
max_length = df_grouped['amount'].apply(len).max()

# Realizar el padding al inicio de las filas para que todas tengan la misma longitud
df_grouped['amount'] = df_grouped['amount'].apply(lambda x: [0] * (max_length - len(x)) + x)

# Separar la columna amount en varias columnas
amount_df = pd.DataFrame(df_grouped['amount'].to_list(), columns=[f'amount_{i+1}' for i in range(max_length)])

# Unir los DataFrames resultantes
result_df = pd.concat([df_grouped.drop(columns=['amount']), amount_df], axis=1)

In [17]:
result_df

Unnamed: 0,customer_id,amount_1,amount_2,amount_3,amount_4,amount_5,amount_6,amount_7,amount_8,amount_9,...,amount_39,amount_40,amount_41,amount_42,amount_43,amount_44,amount_45,amount_46,amount_47,amount_48
0,100000,49.0843,45.1530,28.5473,51.2210,70.6396,66.4318,59.0138,90.7214,64.17770,...,26.735078,46.6362,47.563753,18.175400,25.572400,40.299500,30.681900,32.899000,3.522400,8.171900
1,100001,131.2384,128.5816,112.9212,116.7928,122.2274,117.4014,121.4014,164.7714,183.11890,...,67.940100,66.5099,84.713800,50.651800,51.800100,62.251200,62.557500,78.852000,58.700800,53.276400
2,100004,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,276.91930,...,254.545378,236.8439,230.684373,208.593832,171.014900,123.454700,96.027300,140.136600,14.331900,5.530000
3,100005,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.00000,...,195.240100,204.9193,306.376800,231.761000,259.851900,297.605900,312.957300,272.974100,264.353893,199.571800
4,100006,445.4996,421.3517,457.7764,410.2522,509.0340,947.6391,945.1321,786.3095,768.84560,...,891.826637,737.0385,1029.060861,1155.641657,1224.266975,1272.383818,1464.956339,1464.483096,1299.169500,1110.038474
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
107335,399982,30.0824,33.7443,28.0285,21.2577,22.0559,32.5173,33.2338,42.2725,40.14720,...,7.483500,10.8298,14.493000,12.248300,12.459600,9.859700,5.854200,7.502600,6.939100,5.635800
107336,399986,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.00000,...,297.041300,284.6913,258.322400,479.959300,580.589900,539.388298,580.148800,593.305550,456.716700,378.706500
107337,399987,192.6681,139.2060,150.4393,172.0419,172.9516,215.3614,292.2377,272.9573,213.35295,...,190.105137,228.8408,296.405694,191.056700,217.197130,270.499378,169.834676,72.558500,2.493800,0.165600
107338,399991,6.6572,3.8746,4.4804,15.2166,26.0712,39.0483,25.6564,22.2324,16.00750,...,10.445800,26.4990,28.462800,24.343000,28.241000,21.125300,30.871800,15.114400,2.198900,11.547300


In [18]:
result_df['churn_next_month'] = 0

# Actualizando la columna 'churn_next_month' a 1 donde el 'customer_id' coincida con los churn_customers
result_df.loc[result_df['customer_id'].isin(churn_customers), 'churn_next_month'] = 1

result_df

Unnamed: 0,customer_id,amount_1,amount_2,amount_3,amount_4,amount_5,amount_6,amount_7,amount_8,amount_9,...,amount_40,amount_41,amount_42,amount_43,amount_44,amount_45,amount_46,amount_47,amount_48,churn_next_month
0,100000,49.0843,45.1530,28.5473,51.2210,70.6396,66.4318,59.0138,90.7214,64.17770,...,46.6362,47.563753,18.175400,25.572400,40.299500,30.681900,32.899000,3.522400,8.171900,0
1,100001,131.2384,128.5816,112.9212,116.7928,122.2274,117.4014,121.4014,164.7714,183.11890,...,66.5099,84.713800,50.651800,51.800100,62.251200,62.557500,78.852000,58.700800,53.276400,0
2,100004,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,276.91930,...,236.8439,230.684373,208.593832,171.014900,123.454700,96.027300,140.136600,14.331900,5.530000,1
3,100005,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.00000,...,204.9193,306.376800,231.761000,259.851900,297.605900,312.957300,272.974100,264.353893,199.571800,0
4,100006,445.4996,421.3517,457.7764,410.2522,509.0340,947.6391,945.1321,786.3095,768.84560,...,737.0385,1029.060861,1155.641657,1224.266975,1272.383818,1464.956339,1464.483096,1299.169500,1110.038474,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
107335,399982,30.0824,33.7443,28.0285,21.2577,22.0559,32.5173,33.2338,42.2725,40.14720,...,10.8298,14.493000,12.248300,12.459600,9.859700,5.854200,7.502600,6.939100,5.635800,0
107336,399986,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.00000,...,284.6913,258.322400,479.959300,580.589900,539.388298,580.148800,593.305550,456.716700,378.706500,0
107337,399987,192.6681,139.2060,150.4393,172.0419,172.9516,215.3614,292.2377,272.9573,213.35295,...,228.8408,296.405694,191.056700,217.197130,270.499378,169.834676,72.558500,2.493800,0.165600,1
107338,399991,6.6572,3.8746,4.4804,15.2166,26.0712,39.0483,25.6564,22.2324,16.00750,...,26.4990,28.462800,24.343000,28.241000,21.125300,30.871800,15.114400,2.198900,11.547300,0


In [19]:
# Eliminando la primera y última columna
df_sinID = result_df.iloc[:, 1:-1]
df_sinID

Unnamed: 0,amount_1,amount_2,amount_3,amount_4,amount_5,amount_6,amount_7,amount_8,amount_9,amount_10,...,amount_39,amount_40,amount_41,amount_42,amount_43,amount_44,amount_45,amount_46,amount_47,amount_48
0,49.0843,45.1530,28.5473,51.2210,70.6396,66.4318,59.0138,90.7214,64.17770,37.6583,...,26.735078,46.6362,47.563753,18.175400,25.572400,40.299500,30.681900,32.899000,3.522400,8.171900
1,131.2384,128.5816,112.9212,116.7928,122.2274,117.4014,121.4014,164.7714,183.11890,189.3811,...,67.940100,66.5099,84.713800,50.651800,51.800100,62.251200,62.557500,78.852000,58.700800,53.276400
2,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,276.91930,305.8946,...,254.545378,236.8439,230.684373,208.593832,171.014900,123.454700,96.027300,140.136600,14.331900,5.530000
3,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.00000,33.4624,...,195.240100,204.9193,306.376800,231.761000,259.851900,297.605900,312.957300,272.974100,264.353893,199.571800
4,445.4996,421.3517,457.7764,410.2522,509.0340,947.6391,945.1321,786.3095,768.84560,832.6491,...,891.826637,737.0385,1029.060861,1155.641657,1224.266975,1272.383818,1464.956339,1464.483096,1299.169500,1110.038474
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
107335,30.0824,33.7443,28.0285,21.2577,22.0559,32.5173,33.2338,42.2725,40.14720,45.3371,...,7.483500,10.8298,14.493000,12.248300,12.459600,9.859700,5.854200,7.502600,6.939100,5.635800
107336,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.00000,0.0000,...,297.041300,284.6913,258.322400,479.959300,580.589900,539.388298,580.148800,593.305550,456.716700,378.706500
107337,192.6681,139.2060,150.4393,172.0419,172.9516,215.3614,292.2377,272.9573,213.35295,180.2159,...,190.105137,228.8408,296.405694,191.056700,217.197130,270.499378,169.834676,72.558500,2.493800,0.165600
107338,6.6572,3.8746,4.4804,15.2166,26.0712,39.0483,25.6564,22.2324,16.00750,2.9235,...,10.445800,26.4990,28.462800,24.343000,28.241000,21.125300,30.871800,15.114400,2.198900,11.547300


In [20]:
import pandas as pd

# Función para calcular el porcentaje de cambio
def porcentaje_cambio(suma_anterior, suma_actual):
    return ((suma_actual - suma_anterior) / suma_anterior)

# Inicializar una lista para almacenar los resultados
resultados = []

# Iterar sobre cada fila del DataFrame
for index, row in df_sinID.iterrows():
    array = row.values
    cambios = []
    
    # Iterar sobre las ventanas con superposición de 1 elemento
    for i in range(len(array) - 2):
        suma_anterior = array[i] + array[i + 1]
        suma_actual = array[i + 1] + array[i + 2]
        cambio = porcentaje_cambio(suma_anterior, suma_actual)
        cambios.append(cambio)
    
    # Agregar los resultados a la lista de resultados
    resultados.append(cambios)

# Convertir la lista de resultados en un nuevo DataFrame
resultados_df = pd.DataFrame(resultados, columns=[f'change_{i+1}' for i in range(len(resultados[0]))])

print("\nDataFrame con los porcentajes de cambio:")
resultados_df



  return ((suma_actual - suma_anterior) / suma_anterior)
  return ((suma_actual - suma_anterior) / suma_anterior)



DataFrame con los porcentajes de cambio:


Unnamed: 0,change_1,change_2,change_3,change_4,change_5,change_6,change_7,change_8,change_9,change_10,...,change_37,change_38,change_39,change_40,change_41,change_42,change_43,change_44,change_45,change_46
0,-0.217929,0.082333,0.527682,0.124821,-0.084816,0.193627,0.034487,-0.342566,-0.138510,0.533100,...,-0.125152,-0.016525,0.283880,-0.302132,-0.334524,0.505719,0.077567,-0.104260,-0.427164,-0.678917
1,-0.070500,-0.048814,0.040512,0.002546,-0.003447,0.198365,0.215665,0.070740,-0.063157,-0.073689,...,0.174017,0.146315,0.124758,-0.104865,-0.243147,0.113218,0.094321,0.133010,-0.027273,-0.185933
2,,,,,,,inf,1.104634,-0.054558,-0.065028,...,0.199377,0.095853,-0.048558,-0.060424,-0.135835,-0.224281,-0.254653,0.076006,-0.345927,-0.871418
3,,,,,,,,inf,3.523157,0.687503,...,-0.068133,-0.114977,0.277731,0.052497,-0.086455,0.133936,0.095264,-0.040343,-0.082951,-0.136606
4,0.014163,-0.012626,0.059051,0.584570,0.299380,-0.085235,-0.101815,0.029797,-0.067576,-0.149165,...,-0.035618,-0.118718,0.084251,0.237021,0.089351,0.049053,0.096405,0.070177,-0.056593,-0.128252
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
107335,-0.032179,-0.202138,-0.121182,0.259955,0.204824,0.148366,0.091561,0.037183,-0.001145,-0.139930,...,-0.019770,0.048391,0.382755,0.056017,-0.076040,-0.096674,-0.295950,-0.150001,0.081225,-0.129265
107336,,,,,,,,,,,...,-0.106023,-0.210476,-0.066558,0.359601,0.436510,0.056036,-0.000394,0.048160,-0.105187,-0.204376
107337,-0.127243,0.113366,0.069810,0.125566,0.307191,0.113467,-0.139571,-0.190704,0.054399,0.108448,...,-0.030456,0.116184,0.253733,-0.071936,-0.162492,0.194591,-0.097115,-0.449524,-0.690370,-0.964566
107338,-0.206688,1.357510,1.096147,0.577209,-0.006370,-0.259887,-0.201486,-0.504941,0.300581,0.734880,...,3.762756,1.996261,0.487674,-0.039227,-0.004200,-0.061192,0.053291,-0.115601,-0.623511,-0.206032


In [21]:
import numpy as np

In [22]:
# Reemplazar NaN por 0
resultados_df.fillna(0, inplace=True)

# Reemplazar inf por 1
resultados_df.replace([np.inf, -np.inf], 1, inplace=True)

resultados_df

Unnamed: 0,change_1,change_2,change_3,change_4,change_5,change_6,change_7,change_8,change_9,change_10,...,change_37,change_38,change_39,change_40,change_41,change_42,change_43,change_44,change_45,change_46
0,-0.217929,0.082333,0.527682,0.124821,-0.084816,0.193627,0.034487,-0.342566,-0.138510,0.533100,...,-0.125152,-0.016525,0.283880,-0.302132,-0.334524,0.505719,0.077567,-0.104260,-0.427164,-0.678917
1,-0.070500,-0.048814,0.040512,0.002546,-0.003447,0.198365,0.215665,0.070740,-0.063157,-0.073689,...,0.174017,0.146315,0.124758,-0.104865,-0.243147,0.113218,0.094321,0.133010,-0.027273,-0.185933
2,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,1.104634,-0.054558,-0.065028,...,0.199377,0.095853,-0.048558,-0.060424,-0.135835,-0.224281,-0.254653,0.076006,-0.345927,-0.871418
3,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,3.523157,0.687503,...,-0.068133,-0.114977,0.277731,0.052497,-0.086455,0.133936,0.095264,-0.040343,-0.082951,-0.136606
4,0.014163,-0.012626,0.059051,0.584570,0.299380,-0.085235,-0.101815,0.029797,-0.067576,-0.149165,...,-0.035618,-0.118718,0.084251,0.237021,0.089351,0.049053,0.096405,0.070177,-0.056593,-0.128252
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
107335,-0.032179,-0.202138,-0.121182,0.259955,0.204824,0.148366,0.091561,0.037183,-0.001145,-0.139930,...,-0.019770,0.048391,0.382755,0.056017,-0.076040,-0.096674,-0.295950,-0.150001,0.081225,-0.129265
107336,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,-0.106023,-0.210476,-0.066558,0.359601,0.436510,0.056036,-0.000394,0.048160,-0.105187,-0.204376
107337,-0.127243,0.113366,0.069810,0.125566,0.307191,0.113467,-0.139571,-0.190704,0.054399,0.108448,...,-0.030456,0.116184,0.253733,-0.071936,-0.162492,0.194591,-0.097115,-0.449524,-0.690370,-0.964566
107338,-0.206688,1.357510,1.096147,0.577209,-0.006370,-0.259887,-0.201486,-0.504941,0.300581,0.734880,...,3.762756,1.996261,0.487674,-0.039227,-0.004200,-0.061192,0.053291,-0.115601,-0.623511,-0.206032


In [29]:
birthdeath = pd.read_csv("birthdeath_output.csv", header=None)
birthdeath.columns = ['max_pers', '2_pers', '3_pers']

In [23]:
resultados_df['customer_id'] = result_df['customer_id']
resultados_df['churn_next_month'] = result_df['churn_next_month']
resultados_df

Unnamed: 0,change_1,change_2,change_3,change_4,change_5,change_6,change_7,change_8,change_9,change_10,...,change_39,change_40,change_41,change_42,change_43,change_44,change_45,change_46,customer_id,churn_next_month
0,-0.217929,0.082333,0.527682,0.124821,-0.084816,0.193627,0.034487,-0.342566,-0.138510,0.533100,...,0.283880,-0.302132,-0.334524,0.505719,0.077567,-0.104260,-0.427164,-0.678917,100000,0
1,-0.070500,-0.048814,0.040512,0.002546,-0.003447,0.198365,0.215665,0.070740,-0.063157,-0.073689,...,0.124758,-0.104865,-0.243147,0.113218,0.094321,0.133010,-0.027273,-0.185933,100001,0
2,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,1.104634,-0.054558,-0.065028,...,-0.048558,-0.060424,-0.135835,-0.224281,-0.254653,0.076006,-0.345927,-0.871418,100004,1
3,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,3.523157,0.687503,...,0.277731,0.052497,-0.086455,0.133936,0.095264,-0.040343,-0.082951,-0.136606,100005,0
4,0.014163,-0.012626,0.059051,0.584570,0.299380,-0.085235,-0.101815,0.029797,-0.067576,-0.149165,...,0.084251,0.237021,0.089351,0.049053,0.096405,0.070177,-0.056593,-0.128252,100006,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
107335,-0.032179,-0.202138,-0.121182,0.259955,0.204824,0.148366,0.091561,0.037183,-0.001145,-0.139930,...,0.382755,0.056017,-0.076040,-0.096674,-0.295950,-0.150001,0.081225,-0.129265,399982,0
107336,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,-0.066558,0.359601,0.436510,0.056036,-0.000394,0.048160,-0.105187,-0.204376,399986,0
107337,-0.127243,0.113366,0.069810,0.125566,0.307191,0.113467,-0.139571,-0.190704,0.054399,0.108448,...,0.253733,-0.071936,-0.162492,0.194591,-0.097115,-0.449524,-0.690370,-0.964566,399987,1
107338,-0.206688,1.357510,1.096147,0.577209,-0.006370,-0.259887,-0.201486,-0.504941,0.300581,0.734880,...,0.487674,-0.039227,-0.004200,-0.061192,0.053291,-0.115601,-0.623511,-0.206032,399991,0


In [30]:
# Append birthdeath to resultados_df, eliminating extra rows
resultados_df = pd.merge(resultados_df, birthdeath, left_index=True, right_index=True)

resultados_df['homology'] = birthdeath['max_pers']

In [31]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix, classification_report

In [32]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, f1_score, precision_recall_curve, roc_auc_score
from imblearn.over_sampling import SMOTE

df = resultados_df

X = df.drop(['customer_id', 'churn_next_month'], axis=1)
y = df['churn_next_month']

X_resampled, y_resampled = SMOTE().fit_resample(X, y)

X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)

y_pred_prob = rf_model.predict_proba(X_test)[:, 1]

precision, recall, thresholds = precision_recall_curve(y_test, y_pred_prob)
f1_scores = 2 * (precision * recall) / (precision + recall)
best_threshold = thresholds[np.argmax(f1_scores)]

y_pred = (y_pred_prob >= best_threshold).astype(int)

accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_pred_prob)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

# Print evaluation metrics
print(f"Accuracy: {accuracy}")
print(f"F1 Score: {f1}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"ROC AUC: {roc_auc}")
print(f"Confusion Matrix:\n{conf_matrix}")
print(f"Classification Report:\n{class_report}")

# Function to loop over the first n rows and print predicted and actual values
def loop_first_n_rows(n):
    first_n_rows = df.head(n)
    X_first_n = first_n_rows.drop(columns=['customer_id', 'churn_next_month'])
    y_first_n = first_n_rows['churn_next_month']
    y_pred_first_n = rf_model.predict(X_first_n)

    correct_predictions = 0
    for i in range(n):
        print(f"Row {i+1}: Predicted: {y_pred_first_n[i]}, Actual: {y_first_n.iloc[i]}")
        if y_pred_first_n[i] == y_first_n.iloc[i]:
            correct_predictions += 1

    accuracy_first_n = correct_predictions / n
    print(f"Accuracy for the first {n} predictions: {accuracy_first_n}")

# Define the number of rows to loop over
n = len(df)  # You can change this value to any number you want
loop_first_n_rows(n)


found 0 physical cores < 1
  File "c:\Users\soalv\miniconda3\envs\SLearn\Lib\site-packages\joblib\externals\loky\backend\context.py", line 282, in _count_physical_cores
    raise ValueError(f"found {cpu_count_physical} physical cores < 1")


Accuracy: 0.9507839457594285
F1 Score: 0.9520665055126467
Precision: 0.9366046053013166
Recall: 0.9680474791679156
ROC AUC: 0.9885278699445457
Confusion Matrix:
[[15264  1093]
 [  533 16148]]
Classification Report:
              precision    recall  f1-score   support

           0       0.97      0.93      0.95     16357
           1       0.94      0.97      0.95     16681

    accuracy                           0.95     33038
   macro avg       0.95      0.95      0.95     33038
weighted avg       0.95      0.95      0.95     33038

Row 1: Predicted: 0, Actual: 0
Row 2: Predicted: 0, Actual: 0
Row 3: Predicted: 1, Actual: 1
Row 4: Predicted: 0, Actual: 0
Row 5: Predicted: 0, Actual: 0
Row 6: Predicted: 1, Actual: 1
Row 7: Predicted: 0, Actual: 0
Row 8: Predicted: 0, Actual: 0
Row 9: Predicted: 0, Actual: 0
Row 10: Predicted: 0, Actual: 0
Row 11: Predicted: 0, Actual: 0
Row 12: Predicted: 0, Actual: 0
Row 13: Predicted: 0, Actual: 0
Row 14: Predicted: 1, Actual: 1
Row 15: Predicted: 

In [17]:
import xgboost as xgb
import joblib

# Guardar el modelo usando joblib
joblib.dump(rf_model, 'randomforest_rocketmodel.pkl')


['randomforest_rocketmodel.pkl']