In [1]:
import numpy as np
import pandas as pd
import datetime
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

In [2]:
df_train = pd.read_csv('train.csv')
df_test = pd.read_csv('test.csv')

In [3]:
y_train = pd.DataFrame(df_train['No-show'])
y_test = pd.DataFrame(df_test['No-show'])

In [4]:
label_encoder = LabelEncoder()
c_features = ['No-show', 'Gender']

In [5]:
for feature in c_features:
    df_train[feature] = label_encoder.fit_transform(df_train[feature])
for feature in c_features:
    df_test[feature] = label_encoder.fit_transform(df_test[feature])

In [6]:
#Elimino il target e le feature non necessarie
columns_to_be_deleted = ['No-show', 'PatientId', 'AppointmentID', 'Neighbourhood']

df_train.drop(columns_to_be_deleted, axis=1, inplace=True)
df_test.drop(columns_to_be_deleted, axis=1, inplace=True)

In [7]:
#Converto in datatime le features ScheduledDay e AppointmentDay
df_train['ScheduledDay'] = pd.to_datetime(df_train['ScheduledDay'], errors='coerce')
df_train['AppointmentDay'] = pd.to_datetime(df_train['AppointmentDay'], errors='coerce')

df_test['ScheduledDay'] = pd.to_datetime(df_test['ScheduledDay'], errors='coerce')
df_test['AppointmentDay'] = pd.to_datetime(df_test['AppointmentDay'], errors='coerce')

In [8]:
#Estraggo dalla Feature AppointmentDay: Weekday, Month, Year
df_train['Appointment_Weekday'] = df_train.AppointmentDay.dt.dayofweek
df_train['Appointment_Day'] = df_train.AppointmentDay.dt.day
df_train['Appointment_Month'] = df_train.AppointmentDay.dt.month
df_train['Appointment_Year'] = df_train.AppointmentDay.dt.year

df_test['Appointment_Weekday'] = df_test.AppointmentDay.dt.dayofweek
df_test['Appointment_Day'] = df_test.AppointmentDay.dt.day
df_test['Appointment_Month'] = df_test.AppointmentDay.dt.month
df_test['Appointment_Year'] = df_test.AppointmentDay.dt.year

In [9]:
#Calcolo una nuova feature Waiting_Period
df_train['Waiting_Period'] = df_train.AppointmentDay.dt.date - df_train.ScheduledDay.dt.date
df_test['Waiting_Period'] = df_test.AppointmentDay.dt.date - df_test.ScheduledDay.dt.date
df_train['Waiting_Period'] = df_train.Waiting_Period.dt.days
df_test['Waiting_Period'] = df_test.Waiting_Period.dt.days

columns_to_be_deleted = ['ScheduledDay', 'AppointmentDay', ]
df_train.drop(columns_to_be_deleted, axis=1, inplace=True)
df_test.drop(columns_to_be_deleted, axis=1, inplace=True)

In [10]:
#Applico lo scaling alla feature Age
scaler = MinMaxScaler()
df_train['Age'] = scaler.fit_transform(df_train)
df_test['Age'] = scaler.fit_transform(df_test)

In [11]:
model = MLPClassifier()

In [12]:
model.fit(df_train, y_train.values.ravel())

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(100,), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=200,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=None, shuffle=True, solver='adam',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)

In [13]:
p_train = model.predict(df_train)
p_test = model.predict(df_test)

In [14]:
acc_train = accuracy_score(y_train, p_train)
acc_test = accuracy_score(y_test, p_test)

print(f'Train {acc_train}, Test {acc_test}')

Train 0.7973309206061977, Test 0.7985162399348593
