# all_models.ipynb 

Here we will run our data through all models to see which one performs best

In [22]:
import pandas as pd
import os
import math
import argparse
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.datasets import load_iris, load_wine, load_digits, load_breast_cancer, load_diabetes
from sklearn.metrics import accuracy_score

import warnings
warnings.filterwarnings("ignore")

# placeholder for when we actually have data we want to use
sample_df = pd.read_csv('data/CombinedPricePlayerCountHistory/10.csv',encoding='utf-8')

#read in csv, convert date column to datetime object
sample_df['Date'] = pd.to_datetime(sample_df['Date'])

#2/3 samples train/test split
samples = len(sample_df)
split = math.floor(samples * (2/3))

#split at date 2/3 of the way through entry by using "split" to index into the df, get the date, and use that
split_date = sample_df.iloc[split]['Date']
split_date = pd.to_datetime(split_date)
train_data = sample_df[sample_df['Date'] < split_date]
test_data = sample_df[sample_df['Date'] >= split_date]

#Considering 'Finalprice' spoils whether or not there is a discount/it's amount so we omit that feature
#Set up training data
X_train = train_data[['Date', 'Playercount', 'Initialprice']]
y_train = train_data['Discount']

#Set up testing data
X_test = test_data[['Date', 'Playercount', 'Initialprice']]
y_test = test_data['Discount']

print(X_test)
print(y_test)

#convert dates into something more compatible with our model (dates are not ML friendly)
X_train['DayOfWeek'] = X_train['Date'].dt.dayofweek
X_train['Month'] = X_train['Date'].dt.month
X_train['Year'] = X_train['Date'].dt.year
X_test['DayOfWeek'] = X_test['Date'].dt.dayofweek
X_test['Month'] = X_test['Date'].dt.month
X_test['Year'] = X_test['Date'].dt.year

#drop the old date
X_train = X_train.drop(['Date'], axis=1)
X_test = X_test.drop(['Date'], axis=1)

          Date  Playercount  Initialprice
328 2020-02-29        10612          9.99
329 2020-03-01        10648          9.99
330 2020-03-02         9096          9.99
331 2020-03-03         8934          9.99
332 2020-03-04         8837          9.99
..         ...          ...           ...
488 2020-08-08        10939          9.99
489 2020-08-09        11414          9.99
490 2020-08-10        10516          9.99
491 2020-08-11        10571          9.99
492 2020-08-12        10287          9.99

[165 rows x 3 columns]
328    0
329    0
330    0
331    0
332    0
      ..
488    0
489    0
490    0
491    0
492    0
Name: Discount, Length: 165, dtype: int64


Setup the models

In [23]:
names = [
    "Nearest Neighbors",
    "Linear SVM",
    "RBF SVM",
    "Decision Tree",
    "Random Forest",
    "Neural Net",
    "AdaBoost",
    "Naive Bayes",
]

classifiers = [
    KNeighborsClassifier(3),
    SVC(kernel="linear", C=0.025),
    SVC(gamma=2, C=1),
    DecisionTreeClassifier(max_depth=5),
    RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
    MLPClassifier(alpha=1, max_iter=1000),
    AdaBoostClassifier(),
    GaussianNB(),
]

for i in range(len(classifiers)):
    cn = names[i]
    clf = classifiers[i]
    clf.fit(X_train, y_train)
    y_predict = clf.predict(X_train)
    train_acc = accuracy_score(y_train, y_predict)

    y_predict = clf.predict(X_test)
    test_acc = accuracy_score(y_test, y_predict)

    print("Name:", cn)
    print("Training:", train_acc)
    print("Testing:", test_acc)
    print("\n")   


Name: Nearest Neighbors
Training: 0.9115853658536586
Testing: 0.9151515151515152


Name: Linear SVM
Training: 0.9024390243902439
Testing: 0.30303030303030304


Name: RBF SVM
Training: 1.0
Testing: 0.9151515151515152


Name: Decision Tree
Training: 0.9451219512195121
Testing: 0.5636363636363636


Name: Random Forest
Training: 0.9298780487804879
Testing: 0.9151515151515152


Name: Neural Net
Training: 0.8932926829268293
Testing: 0.9151515151515152


Name: AdaBoost
Training: 0.9451219512195121
Testing: 0.6545454545454545


Name: Naive Bayes
Training: 0.9115853658536586
Testing: 0.9151515151515152


