In [1]:
import pandas as pd
from SupervisedLearningUtils import *

version="1.0"
dataset_folder = "output-datasets"
dataset_name = "all_seasons_merged_mult_feature-selected"
dataset_path = f"{dataset_folder}/{dataset_name}"
results_path = f"output-models/predictions_{dataset_name}_{version}.csv"

curr_points_width = 10
train = read_csv_and_get_inputs_and_labels_classification(f"{dataset_path}-train.csv", points_width=curr_points_width)
test = read_csv_and_get_inputs_and_labels_classification(f"{dataset_path}-test.csv", points_width=curr_points_width)

train_binary = read_csv_and_get_inputs_and_labels_bin_classification(f"{dataset_path}-train.csv", mean=224, width=5)
test_binary = read_csv_and_get_inputs_and_labels_bin_classification(f"{dataset_path}-test.csv", mean=224, width=5)

print(train[0].shape)
print(test[0].shape)

print(train_binary[0].shape)
print(test_binary[0].shape)

predictions = {'truth': test[1]}

(16755, 19)
(7247, 19)
(16755, 19)
(7247, 19)


In [2]:
def execute_model_and_save_results(model, label, train, test):
    train_results, test_results, test_predictions = test_model_classification(model, train, test)
    print(f"{label} Model Results: {train_results, test_results}")
    predictions[label] = test_predictions


# SVM

In [3]:
from sklearn.svm import SVC

svc_model = SVC()
execute_model_and_save_results(svc_model, 'SVC', train, test)

SVC Model Results: (0.26893464637421666, 0.2334759210707879)


In [4]:
from sklearn.svm import SVC

svc_binary_model = SVC(kernel='sigmoid')
execute_model_and_save_results(svc_binary_model, 'SVC_binary', train_binary, test_binary)

SVC_binary Model Results: (0.8353327364965681, 0.8385538843659446)


# Decision Tree

In [5]:
from sklearn.tree import DecisionTreeClassifier

decision_tree = DecisionTreeClassifier()
execute_model_and_save_results(decision_tree, 'decision_tree', train, test)

decision_tree Model Results: (0.997851387645479, 0.16917345108320683)


In [6]:
from sklearn.tree import DecisionTreeClassifier

decision_tree_binary = DecisionTreeClassifier()
execute_model_and_save_results(decision_tree_binary, 'decision_tree_binary', train_binary, test_binary)

decision_tree_binary Model Results: (0.9995822142643986, 0.8265489167931558)


# Random Forest

In [7]:
from sklearn.ensemble import RandomForestClassifier

ideal_params = {'n_estimators': 30, 'min_samples_split': 15, 'max_features': 'sqrt', 'min_samples_leaf': 2, 'max_depth': None}

random_forest = RandomForestClassifier()
execute_model_and_save_results(random_forest, 'random_forest', train, test)

random_forest Model Results: (0.997851387645479, 0.21677935697530012)


In [8]:
from sklearn.ensemble import RandomForestClassifier

ideal_params = {'n_estimators': 30, 'min_samples_split': 15, 'max_features': 'sqrt', 'min_samples_leaf': 2, 'max_depth': None}

random_forest_binary = RandomForestClassifier()
execute_model_and_save_results(random_forest_binary, 'random_forest_binary', train_binary, test_binary)

random_forest_binary Model Results: (0.9995225305878842, 0.9046502000827928)


# Gradient Boosting Classifier

In [9]:
from sklearn.ensemble import GradientBoostingClassifier

gardient_boost_classifier = GradientBoostingClassifier()
execute_model_and_save_results(gardient_boost_classifier, 'gardient_boost_classifier', train, test)

gardient_boost_classifier Model Results: (0.43097582811101165, 0.22823237201600663)


In [10]:
from sklearn.ensemble import GradientBoostingClassifier

gardient_boost_classifier_binary = GradientBoostingClassifier()
execute_model_and_save_results(gardient_boost_classifier_binary, 'gardient_boost_classifier_binary', train_binary, test_binary)

gardient_boost_classifier_binary Model Results: (0.902775290957923, 0.9082378915413274)


# Ada Boost Classifier

In [11]:
from sklearn.ensemble import AdaBoostClassifier

ada_boost_classifier = AdaBoostClassifier()
execute_model_and_save_results(ada_boost_classifier, 'ada_boost_classifier', train, test)

ada_boost_classifier Model Results: (0.18108027454491196, 0.17151924934455637)


In [12]:
from sklearn.ensemble import AdaBoostClassifier

ada_boost_classifier_binary = AdaBoostClassifier()
execute_model_and_save_results(ada_boost_classifier_binary, 'ada_boost_classifier_binary', train_binary, test_binary)

ada_boost_classifier_binary Model Results: (0.8997314234556849, 0.9080999034083069)


# Neural Network

In [13]:
from sklearn.neural_network import MLPClassifier

neural_model = MLPClassifier(max_iter=2500, hidden_layer_sizes=(50, 50, 50, 50, 10))
execute_model_and_save_results(neural_model, 'neural_network', train, test)

neural_network Model Results: (0.4657714115189496, 0.19414930315992823)


In [14]:
from sklearn.neural_network import MLPClassifier

neural_model_binary = MLPClassifier(max_iter=2500, hidden_layer_sizes=(50, 50, 50, 50, 10))
execute_model_and_save_results(neural_model_binary, 'neural_network_binary', train_binary, test_binary)

neural_network_binary Model Results: (0.9668755595344674, 0.8373119911687595)


# Output Results

In [15]:
pd.DataFrame.from_dict(predictions).to_csv(results_path)