# ML tests - Neural Network

## Libraries

In [1]:
# necessary to import other modules
import sys
sys.path.append('../../application')

from functions.machine_learning_model import *

import pandas as pd
import numpy as np
from tqdm import tqdm

from sklearn.neural_network import MLPClassifier

print("Libraries Imported")

Libraries Imported


## Get the appalti_aggiudicatari dataset

In [2]:
df = pd.read_csv("appalti_aggiudicatari_dummified.csv", sep=";")
print("\nTotal rows:", df.shape[0])
print("Total cols:", df.shape[1])


Total rows: 1514852
Total cols: 95


In [3]:
# ensure the data type for data_pubblicazione is correct
df['data_pubblicazione'] = pd.to_datetime(df['data_pubblicazione'])  

## Training and Test set

- *Training*: all months except the last one
- *Test*: the last month

In [4]:
# select the first 11 month of 2020
start_date = "2020-10-01"
end_date = "2020-11-30"
mask = (df['data_pubblicazione'] >= start_date) & (df['data_pubblicazione'] <= end_date)

training_df = df.loc[mask]
training_df = training_df.reset_index()
print("\nTotal rows for training:", training_df.shape[0])


Total rows for training: 18316


In [5]:
# select the December 2020 (to use as test)
start_date = "2020-12-01"
end_date = "2020-12-31"
mask = (df['data_pubblicazione'] >= start_date) & (df['data_pubblicazione'] <= end_date)

test_df = df.loc[mask]
test_df = test_df.reset_index()
print("\nTotal rows for testing:", test_df.shape[0])


Total rows for testing: 7177


## Neural Network

In [6]:
# Prepare the training input
tr_input = prepareInputForClassifier(training_df)

print("\n\n", tr_input[0][0:2])
print("\n", tr_input[1][0:2])

Preparing data for KNN: 100%|██████████| 18316/18316 [00:06<00:00, 2970.48it/s]

 [[1489359, 70000.0, 1.0, 70000.0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0], [1489360, 13000000.0, 1.0, 13000000.0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]]

 ['ENDRESS + HAUSER ITALIA SPA', 'MSD ITALIA S.R.L.']



In [7]:
# Create and fit the classifier
snn_classifier = MLPClassifier()
snn_classifier.fit(tr_input[0], tr_input[1])



MLPClassifier()

#### Evaluate the model

- with the score function of the library
- measuring by hand the total matches

In [None]:
# Prepare the test input
ts_input = prepareInputForClassifier(test_df)
true_output = ts_input[1]
pred_output = knn_classifier.predict(ts_input[0])

In [None]:
# Compute the predictions
snn_predictions = snn_classifier.predict(ts_input[0])

In [None]:
# Get R^2
snn_classifier.score(ts_input[0], ts_input[1], sample_weight=None)

In [None]:
# Manually compute the total matches
matches = 0
total = len(snn_predictions)
for i in range(total):
    if true_output[i] == snn_predictions[i]:
        matches = matches+1

print("Matches:", matches)
print("Total:",total)