# Features por sesiones


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import inline as inline

%matplotlib inline

pd.set_option('display.width', 400)
pd.set_option('display.max_columns', 50)
pd.set_option('display.max_rows', 200)

plt.style.use('default') # haciendo los graficos un poco mas bonitos en matplotlib
#plt.rcParams['figure.figsize'] = (20, 10)

sns.set(style="whitegrid") # seteando tipo de grid en seaborn

df_sesiones = pd.read_csv('datos_por_sesion.csv')
pd.options.mode.chained_assignment = None

## Feature con eventos get_dummies

Me quedo solo con los campos que voy a utilizar

In [2]:
eventos_por_sesion = df_sesiones[["person","event","sessionid"]]

Uso get_dummies para crear features con la columna categorica "event"

In [3]:
sesiones = pd.get_dummies(eventos_por_sesion["event"])

In [4]:
eventos_dummies = eventos_por_sesion.merge(sesiones,left_index=True, right_index=True)

In [7]:
eventos_dummies.head()

Unnamed: 0,person,event,sessionid,ad campaign hit,brand listing,checkout,conversion,generic listing,lead,search engine hit,searched products,staticpage,viewed product,visited site
0,7dab1178,viewed product,2,0,0,0,0,0,0,0,0,0,1,0
1,2cbd12ad,viewed product,4,0,0,0,0,0,0,0,0,0,1,0
2,4ba8900f,viewed product,4,0,0,0,0,0,0,0,0,0,1,0
3,2cbd12ad,viewed product,4,0,0,0,0,0,0,0,0,0,1,0
4,754cd2b0,visited site,5,0,0,0,0,0,0,0,0,0,0,1


In [6]:
eventos_dummies_por_sesion = eventos_dummies.groupby(["sessionid"]).agg({'person':'max','ad campaign hit':'sum','brand listing':'sum','checkout':'sum','conversion':'sum','generic listing':'sum','lead':'sum','search engine hit':'sum','searched products':'sum','staticpage':'sum','viewed product':'sum','visited site':'sum'})

In [9]:
eventos_dummies_por_sesion = eventos_dummies_por_sesion.reset_index()

In [11]:
eventos_dummies_por_sesion = eventos_dummies_por_sesion.drop(["sessionid"],axis=1)

In [13]:
eventos_dummies_por_sesion = eventos_dummies_por_sesion.set_index('person').reset_index()

In [14]:
eventos_dummies_por_sesion.head()

Unnamed: 0,person,ad campaign hit,brand listing,visited site,searched products,staticpage,conversion,viewed product,lead,search engine hit,generic listing,checkout
0,7dab1178,0,0,0,0,0,0,1,0,0,0,0
1,4ba8900f,0,0,0,0,0,0,3,0,0,0,0
2,754cd2b0,0,0,1,0,0,0,5,0,0,0,0
3,89d96b41,1,2,0,0,0,0,1,0,1,0,0
4,e96ee59c,1,0,0,0,0,0,1,0,0,1,0


In [15]:
eventos_dummies_por_sesion.to_csv("eventos_sesion.csv")

## Feature por modelo con mas de 5 interacciones en la sesion

In [69]:
modelos_por_sesion = df_sesiones[["person","model","sessionid"]]

In [70]:
dfprueba = modelos_por_sesion.groupby(["person","model","sessionid"]).size().reset_index()

In [71]:
modelos_por_sesion= dfprueba.sort_values(by=0,ascending = False).drop_duplicates(subset='sessionid',keep='first')

In [72]:
modelos_por_sesion=modelos_por_sesion.rename(columns={0:'interacciones'})

In [73]:
modelos_por_sesion=modelos_por_sesion.set_index("sessionid").reset_index()

In [74]:
modelos_por_sesion["esNulo"]=modelos_por_sesion["model"]=='0'

In [75]:
#modelos_por_sesion=modelos_por_sesion.loc[modelos_por_sesion['interacciones'] > 5]

In [76]:
modelos_por_sesion=modelos_por_sesion.loc[modelos_por_sesion['esNulo']==False]

In [77]:
modelos_por_sesion=modelos_por_sesion.drop(["esNulo","interacciones"],axis=1)

In [78]:
modelos_por_sesion.head()

Unnamed: 0,sessionid,person,model
0,94740,d0a809fe,Samsung Galaxy S8 Plus
3,10570,1dea6f79,Motorola Moto X2
4,20536,ae92f2a6,iPhone 6S Plus
5,15883,150d41f6,iPhone 5c
7,1626,26858144,iPhone 6


In [79]:
modelos_dummies = pd.get_dummies(modelos_por_sesion["model"])

In [80]:
modelos_dummies.shape

(73033, 170)

In [81]:
modelos_por_sesion_con_dummies=modelos_por_sesion.merge(modelos_dummies,left_index=True,right_index=True)

In [82]:
modelos_por_sesion_con_dummies=modelos_por_sesion_con_dummies.drop(["model","sessionid"],axis=1)

In [83]:
modelos_por_sesion_con_dummies.head()

Unnamed: 0,person,Asus Zenfone 3 Max 32 GB,Asus Zenfone 3 Max 16 GB,LG X Screen,LG G3 Beat D724,LG G3 D855,LG G3 Stylus D690,LG G4 Beat H736,LG G4 H815P,LG G4 H818P,LG G4 Stylus H630,LG G4 Stylus HDTV H540T,LG G5 SE,LG K10,LG K10 Novo,LG K10 TV,LG K4,LG K8,LG L Prime D337,LG Nexus 5 D821,LG Prime Plus H522,LG X Power,Lenovo Vibe A7010 Dual Chip,Lenovo Vibe K5,Motorola Moto E2 3G Dual,...,iPad Air 2 Wi-Fi,iPad Air 2 Wi-Fi + 4G,iPad Air Wi-Fi,iPad Air Wi-Fi + 4G,iPad Mini 2 Wi-Fi,iPad Mini 3 Wi-Fi,iPad Mini 3 Wi-Fi + 4G,iPad Mini 4 Wi-Fi,iPad Mini Wi-Fi,iPad Mini Wi-Fi + 4G,iPhone 4G,iPhone 4S,iPhone 5,iPhone 5c,iPhone 5s,iPhone 6,iPhone 6 Plus,iPhone 6S,iPhone 6S Plus,iPhone 7,iPhone 7 Plus,iPhone 8,iPhone 8 Plus,iPhone SE,iPhone X
0,d0a809fe,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,1dea6f79,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,ae92f2a6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
5,150d41f6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
7,26858144,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0


In [84]:
modelos_por_sesion_con_dummies.to_csv("interacciones_por_modelo.csv")