### **Tratamiento Datos**

In [2]:
import pandas as pd

#TRAIN
train_session = 'train/session_train.csv'
train_user = 'train/user_train.csv'
df_session_train = pd.read_csv(train_session)
df_user_train = pd.read_csv(train_user, delimiter=';')

#TEST
test_session ='test/session_test.csv'
test_user = 'test/user_test.csv'
df_session_test = pd.read_csv(test_session)
df_user_test = pd.read_csv(test_user, delimiter=';')

In [3]:
#TRAIN
df_train = pd.merge(df_session_train,df_user_train, on='user_id', how='inner')

#TEST
df_test = pd.merge(df_session_test,df_user_test, on='user_id', how='inner')

In [4]:
#TRAIN
df_train.drop(['user_id','session_id','timestamp','ip_address','search_query','country'], axis = 1, inplace = True)

#TEST
df_test.drop(['user_id','session_id','timestamp','ip_address','search_query','country'], axis = 1, inplace = True)

In [5]:
from sklearn.preprocessing import OrdinalEncoder
import numpy as np
oe = OrdinalEncoder(handle_unknown='error', dtype=np.int32)

df_train[['device_type', 'browser', 'operating_system', 'abandoned_cart', 'user_category']] = \
  oe.fit_transform(df_train[['device_type', 'browser', 'operating_system', 'abandoned_cart', 'user_category']])

df_test[['device_type', 'browser', 'operating_system', 'abandoned_cart', 'user_category']] = \
  oe.fit_transform(df_test[['device_type', 'browser', 'operating_system', 'abandoned_cart', 'user_category']])

In [6]:
df_train.to_csv('SessionUserTrain.csv', index = False)
df_test.to_csv('SessionUserTest.csv', index = False)

### **Modelo Entrenamiento**

In [7]:
X = df_train.iloc[:, :-1]
y = df_train.iloc[:, -1]

In [8]:
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier(max_depth= 21)
results = model.fit(X_train,y_train)

print(model.score(X_val, y_val))

0.784156976744186


In [10]:
import joblib

joblib.dump(model, ruta_drive  + 'model.pkl')

['/content/drive/MyDrive/NUWE/nuwe-data-ds1/model.pkl']

### **Modelo Predicción**

In [13]:
test_ids = df_test['test_id']
df_test = df_test.drop('test_id', axis=1)

predictions = model.predict(df_test)

predictions_dict = {}
for test_id, prediction in zip(test_ids, predictions):
    predictions_dict[test_id] = prediction

{216: 1, 135: 1, 235: 1, 179: 1, 293: 1, 266: 1, 156: 1, 151: 1, 222: 1, 110: 1, 181: 1, 150: 1, 188: 1, 260: 1, 169: 1, 177: 1, 295: 1, 220: 1, 105: 1, 286: 1, 228: 1, 119: 1, 144: 1, 284: 1, 300: 1, 213: 1, 296: 1, 61: 1, 291: 1, 27: 1, 17: 1, 172: 1, 38: 1, 281: 1, 155: 1, 168: 1, 20: 1, 8: 1, 7: 1, 180: 1, 108: 1, 269: 1, 205: 1, 240: 1, 112: 1, 28: 1, 16: 1, 184: 1, 254: 1, 272: 1, 22: 1, 154: 1, 204: 1, 29: 1, 221: 1, 128: 1, 289: 1, 186: 1, 134: 1, 45: 1, 86: 1, 158: 1, 63: 1, 70: 1, 121: 1, 234: 1, 106: 1, 249: 1, 31: 1, 196: 1, 265: 1, 79: 1, 57: 1, 203: 2, 297: 1, 239: 1, 270: 1, 24: 1, 83: 1, 162: 1, 25: 3, 194: 1, 279: 1, 267: 1, 236: 1, 73: 1, 15: 1, 288: 1, 190: 2, 229: 1, 277: 1, 232: 1, 90: 1, 13: 1, 78: 1, 227: 1, 285: 1, 67: 1, 84: 1, 161: 1, 183: 1, 85: 1, 273: 2, 54: 1, 238: 1, 215: 1, 165: 1, 129: 2, 131: 1, 26: 1, 208: 1, 40: 1, 148: 1, 75: 1, 275: 1, 32: 1, 163: 1, 225: 1, 299: 1, 30: 1, 59: 1, 226: 1, 199: 2, 287: 1, 268: 1, 125: 1, 198: 1, 14: 1, 290: 1, 170: 1

In [17]:
import json
predictions_dict = {str(key): int(value) for key, value in predictions_dict.items()}

predictions_json = {"target": predictions_dict}

with open('predictions.json', 'w') as file:
    json.dump(predictions_json, file, indent=4)