# Running some tests

In [131]:
import pandas as pd
import numpy as np
import datetime

df = pd.read_csv('data/train.csv', delimiter=';', nrows = 10000)
df['date'] = pd.to_datetime(df['DATE'],infer_datetime_format=True)
y = df['CSPL_CALLS']

### Parsing date

In [95]:
if False:
    d = datetime.datetime.now()
    print(d)
    # Jour dans le mois
    print(d.day)
    # Numéro du mois
    print(d.month)
    # Année
    print(d.year)
    # Jour travaillé (0/1)
    print(d.isoweekday())
    # Heure
    print(d.hour)
    # Minutes
    print(d.minute)
    # Secondes
    print(d.second)

In [96]:
def is_working_day(d):
    return d.isoweekday()<6

In [97]:
def trimester(d):
    return (d.month-1)//3

In [98]:
def is_day_shift(d):
    min_hour = {
        'hour':7,
        'minute':30
    }
    max_hour = {
        'hour':23,
        'minute':30
    }
    hour_day = d.hour*60+d.minute
    return (hour_day > min_hour['hour']*60+min_hour['minute']) & (hour_day < max_hour['hour']*60+max_hour['minute'])

In [99]:
def evolution_over_years(d):
    min_date = {
        "year": 2011,
        "month": 1,
        "day": 1
    }
    date_trimester = trimester(d)
    year = d.year
    return (year-min_date['year'])*4+date_trimester

In [90]:
data = df[['DATE', 'ASS_ASSIGNMENT']]

In [100]:
data['DATE'] = pd.to_datetime(df['DATE'],infer_datetime_format=True)

In [106]:
data['year'] = [dd.year for dd in data['DATE']]
data['month'] = [dd.month for dd in data['DATE']]
data['day'] = [dd.day for dd in data['DATE']]
print("Year, month, day ok.")
data['hour'] = [dd.hour for dd in data['DATE']]
data['minute'] = [dd.minute for dd in data['DATE']]
data['second'] = [dd.second for dd in data['DATE']]
print("Hour, minute, second ok.")
data['working'] = [is_working_day(dd) for dd in data['DATE']]
print("Working day ok.")
data['shift'] = [is_day_shift(dd) for dd in data['DATE']]
print("Shift ok.")
data['trimester'] = [trimester(dd) for dd in data['DATE']] 
print("Trimester ok.")
data['evolution'] = [evolution_over_years(dd) for dd in data['DATE']]
print("Evolution ok.")

Year, month, day ok.
Hour, minute, second ok.
Working day ok.
Shift ok.
Trimester ok.
Evolution ok.


In [122]:
map_assignment = {
    "CMS":0,
    "Crises":1,
    "Domicile":2,
    "Gestion":3,
    "Gestion - Accueil Téléphonique":4,
    "Gestion Assurances":5,
    "Gestion Relation Clienteles":6,
    "Gestion Renault":7,
    "Japon":8,
    "Médical":9,
    "Nuit":10,
    "RENAULT":11,
    "Regulation Medicale":12,
    "SAP":13,
    "Services":14,
    "Tech. Axa":15,
    "Tech. Inter":16,
    "Téléphonie":17, 
    "RTC ":18
}

d = pd.get_dummies(data['ASS_ASSIGNMENT'])

In [151]:
new_data = pd.concat([data, d], axis=1)
new_data = new_data.drop(['DATE','ASS_ASSIGNMENT'], axis=1)

# Extracting testing dataset

# Implementing regressor

In [152]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(new_data, y, test_size=0.2)
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(8000, 43)
(2000, 43)
(8000,)
(2000,)


In [153]:
from sklearn.linear_model import Ridge

S=Ridge()
S.fit(X_train, y_train)

Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=False, random_state=None, solver='auto', tol=0.001)

In [155]:
y2=S.predict(X_test)

In [171]:
import error_functions as ef

y_test = np.array(y_test)
ef.linex_loss(y2, y_test)


27.51909702264831