In [1]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.discriminant_analysis import StandardScaler
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

In [2]:
def get_neuralNetwork_dataset(input_dir: str, output_dir: str):
    input = pd.read_csv(input_dir)
    output = pd.read_csv(output_dir)
    df = pd.merge(input, output, left_index=True, right_index=True)

    selected_collumns = ['n_injured', 'n_arrested', 'n_unharmed', 'n_participants']

    df = df[selected_collumns + ['killed']]
    df.dropna(inplace=True)

    input = df[selected_collumns].values
    output = df['killed'].values

    # Normalizza i dati
    scaler = StandardScaler()
    input = scaler.fit_transform(input)
    
    return input, output

input, output = get_neuralNetwork_dataset('data/training/input.csv', 'data/training/output.csv')

**Cross validation**

In [3]:
base_estimator = DecisionTreeClassifier(max_depth=4)
clf = AdaBoostClassifier(estimator=base_estimator, n_estimators=100, random_state=0)

# Ottenere le previsioni di cross-validation
predictions = cross_val_predict(clf, input, output, cv=5)

# Calcolare e stampare il classification report
report = classification_report(output, predictions)
print("Classification Report:\n", report)

Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.99      0.99     93154
           1       0.98      0.99      0.98     34671

    accuracy                           0.99    127825
   macro avg       0.99      0.99      0.99    127825
weighted avg       0.99      0.99      0.99    127825



**Training del miglior modello individuato su tutto il dataset e testing**

In [4]:
input, output = get_neuralNetwork_dataset('data/training/input.csv', 'data/training/output.csv')
input_test, output_test = get_neuralNetwork_dataset('data/testing/input.csv', 'data/testing/output.csv')

In [5]:
# Creare un classificatore AdaBoost
base_estimator = DecisionTreeClassifier(max_depth=4)
clf = AdaBoostClassifier(estimator=base_estimator, n_estimators=100, random_state=0)

# Addestrare il classificatore
clf.fit(input, output)

# Fare previsioni sui dati di test
output_pred = clf.predict(input_test)

# Calcolare e stampare il classification report
report = classification_report(output_test, output_pred)
print("Classification Report:\n", report)

cm = confusion_matrix(output_test, output_pred)
print("Confusion Matrix:\n", cm)

Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.99      0.99     23305
           1       0.98      0.99      0.98      8656

    accuracy                           0.99     31961
   macro avg       0.99      0.99      0.99     31961
weighted avg       0.99      0.99      0.99     31961

Confusion Matrix:
 [[23093   212]
 [  108  8548]]
