In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, date


In [4]:
def crear_features_auctions(auctions_entrenar,datetime_inicio):

    auctions_entrenar['date'] = pd.to_datetime(auctions_entrenar['date'])
    auctions_entrenar['hora'] = auctions_entrenar['date'].dt.hour
    
    dia_en_segundos = 86400
    auctions_entrenar['segundos'] = ((auctions_entrenar['date']-datetime_inicio).dt.seconds + 
                                     (auctions_entrenar['date']-datetime_inicio).dt.days*dia_en_segundos)

    # Cambio source_id por la probabilidad de que aparezca ese source_id
    source = auctions_entrenar['source_id'].value_counts(normalize=True)
    auctions_entrenar['source_id'] = auctions_entrenar['source_id'].replace(source)
    
    # Divido por rangos los horarios en los que se produjeron las subastas
    auctions_entrenar['4-12'] = (4<=auctions_entrenar['hora']) & (auctions_entrenar['hora']<12)
    auctions_entrenar['12-20'] = (12<=auctions_entrenar['hora']) & (auctions_entrenar['hora']<20)
    auctions_entrenar['20-4'] = (20<=auctions_entrenar['hora']) | (auctions_entrenar['hora']<4)
    
    # Agrupo el data frame por los device_id
    auctions_entrenar = auctions_entrenar.groupby('device_id').agg(
        {'source_id':'mean','segundos':['min','mean','max'],'device_id':'count',
         '4-12':'sum','12-20':'sum','20-4':'sum'})
    auctions_entrenar.head()
    auctions_entrenar.columns = ['s_h20-4','s_h4-12','s_t_min','s_t_prom','s_t_max',
                                 's_source_id','s_h12-20','s_#subastas']
    auctions_entrenar = auctions_entrenar.reset_index('device_id')
    
    return auctions_entrenar

In [5]:
auctions_entrenar = pd.read_csv('auctions_1820.csv')
datetime_inicio = datetime(2019,4,18)
f_auctions = crear_features_auctions(auctions_entrenar,datetime_inicio)
f_auctions.to_csv('auctions_train2_1820.csv', index=False)
f_auctions.head()

Unnamed: 0,device_id,s_h20-4,s_h4-12,s_t_min,s_t_prom,s_t_max,s_source_id,s_h12-20,s_#subastas
0,41863526108385,17.0,17.0,157228,185636.171429,189019,0.122122,1.0,35
1,135153013040192,0.0,8.0,187854,188381.875,188652,0.508547,0.0,8
2,161514654074162,6.0,0.0,10366,10425.333333,10489,0.249773,0.0,6
3,181891380775191,1.0,0.0,256765,256765.0,256765,0.508547,0.0,1
4,186034136943920,5.0,0.0,60166,160935.142857,257677,0.508547,2.0,7


In [6]:
auctions_entrenar = pd.read_csv('auctions_1921.csv')
datetime_inicio = datetime(2019,4,19)
f_auctions = crear_features_auctions(auctions_entrenar,datetime_inicio)
f_auctions.to_csv('auctions_train2_1921.csv', index=False)
f_auctions.head()

Unnamed: 0,device_id,s_h20-4,s_h4-12,s_t_min,s_t_prom,s_t_max,s_source_id,s_h12-20,s_#subastas
0,41863526108385,17.0,17.0,70828,99236.171429,102619,0.122257,1.0,35
1,135153013040192,0.0,8.0,101454,101981.875,102252,0.507354,0.0,8
2,181891380775191,1.0,0.0,170365,170365.0,170365,0.507354,0.0,1
3,186034136943920,10.0,5.0,98271,174949.0625,257356,0.507354,1.0,16
4,295841792051458,1.0,0.0,173232,173232.0,173232,0.507354,0.0,1


In [7]:
auctions_entrenar = pd.read_csv('auctions_2022.csv')
datetime_inicio = datetime(2019,4,20)
f_auctions = crear_features_auctions(auctions_entrenar,datetime_inicio)
f_auctions.to_csv('auctions_train2_2022.csv', index=False)
f_auctions.head()

Unnamed: 0,device_id,s_h20-4,s_h4-12,s_t_min,s_t_prom,s_t_max,s_source_id,s_h12-20,s_#subastas
0,41863526108385,17.0,17.0,10346,13671.705882,16219,0.124705,0.0,34
1,135153013040192,0.0,8.0,15054,15581.875,15852,0.51609,0.0,8
2,181891380775191,1.0,0.0,83965,83965.0,83965,0.51609,0.0,1
3,186034136943920,14.0,30.0,11871,168457.019608,255971,0.51609,7.0,51
4,295841792051458,1.0,0.0,86832,86832.0,86832,0.51609,0.0,1


In [8]:
auctions_entrenar = pd.read_csv('auctions_2123.csv')
datetime_inicio = datetime(2019,4,21)
f_auctions = crear_features_auctions(auctions_entrenar,datetime_inicio)
f_auctions.to_csv('auctions_train2_2123.csv', index=False)
f_auctions.head()

Unnamed: 0,device_id,s_h20-4,s_h4-12,s_t_min,s_t_prom,s_t_max,s_source_id,s_h12-20,s_#subastas
0,40621409780134,0.0,0.0,226857,226857.0,226857,0.229348,1.0,1
1,168103949904656,1.0,0.0,173044,173044.0,173044,0.152259,0.0,1
2,186034136943920,10.0,32.0,88,122681.220339,222609,0.522137,17.0,59
3,295841792051458,1.0,0.0,432,432.0,432,0.522137,0.0,1
4,345999128501141,24.0,0.0,164703,248794.166667,256365,0.509938,0.0,24


In [9]:
auctions_entrenar = pd.read_csv('auctions_2426.csv')
datetime_inicio = datetime(2019,4,24)
f_auctions = crear_features_auctions(auctions_entrenar,datetime_inicio)
f_auctions.to_csv('auctions_train2_2426.csv', index=False)
f_auctions.head()

Unnamed: 0,device_id,s_h20-4,s_h4-12,s_t_min,s_t_prom,s_t_max,s_source_id,s_h12-20,s_#subastas
0,69039685746313,0.0,4.0,126258,126455.5,126704,0.027635,0.0,4
1,345999128501141,2.0,0.0,250362,250382.0,250402,0.005048,0.0,2
2,360710529886978,5.0,0.0,46687,162802.071429,256671,0.414065,37.0,42
3,365882020742330,1.0,1.0,96627,143426.0,190225,0.022213,0.0,2
4,416301579449694,28.0,2.0,12569,95799.048387,182084,0.344891,32.0,62


In [10]:
def crear_test_subastas(auctions_validar,datetime_validacion):
    
    dia_en_segundos = 86400
    auctions_validar['date'] = pd.to_datetime(auctions_validar['date'])
    
    auctions_validar = auctions_validar.groupby('device_id').agg({'date':'min'}).reset_index('device_id')  
    auctions_validar['segundos'] = ((auctions_validar['date']-datetime_validacion).dt.seconds + 
                                     (auctions_validar['date']-datetime_validacion).dt.days*dia_en_segundos)
    del auctions_validar['date']
        
    return auctions_validar
    

In [11]:
auctions_validar = pd.read_csv('auctions_2123.csv')
datetime_validar = datetime(2019,4,21)
auctions_validar = crear_test_subastas(auctions_validar,datetime_validar)
auctions_validar.to_csv('auctions_test_1820.csv', index=False)
auctions_validar.head()

Unnamed: 0,device_id,segundos
0,40621409780134,226857
1,168103949904656,173044
2,186034136943920,88
3,295841792051458,432
4,345999128501141,164703


In [12]:
auctions_validar = pd.read_csv('auctions_2224.csv')
datetime_validar = datetime(2019,4,22)
auctions_validar = crear_test_subastas(auctions_validar,datetime_validar)
auctions_validar.to_csv('auctions_test_1921.csv', index=False)

In [13]:
auctions_validar = pd.read_csv('auctions_2325.csv')
datetime_validar = datetime(2019,4,23)
auctions_validar = crear_test_subastas(auctions_validar,datetime_validar)
auctions_validar.to_csv('auctions_test_2022.csv', index=False)

In [14]:
auctions_validar = pd.read_csv('auctions_2426.csv')
datetime_validar = datetime(2019,4,24)
auctions_validar = crear_test_subastas(auctions_validar,datetime_validar)
auctions_validar.to_csv('auctions_test_2123.csv', index=False)