In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import KFold
from sklearn import metrics
from sklearn.utils import shuffle
sns.set()

# Load Preprocessed Data

In [8]:
dataset = pd.read_csv(os.path.join(os.pardir, 'p_season.csv'))
target = pd.read_csv(os.path.join(os.pardir, 'test_season.csv'))
target = target.iloc[:,0]

In [9]:
dataset.drop(['Unnamed: 0', 'periodQ'], axis=1, inplace=True)
target = dataset.pop('PerStatus')

In [10]:
#Shuffle data&target
dataset, target = shuffle(dataset, target)

# RandomForest

In [11]:
from sklearn.ensemble import RandomForestClassifier

kfold = KFold(10, True)
predicted = []
expected = []

for train, test in kfold.split(dataset):
    x_train = dataset.iloc[train]
    y_train = target.iloc[train]
    x_test = dataset.iloc[test]
    y_test = target.iloc[test]
    rf = RandomForestClassifier()
    rf.fit(x_train, y_train)
    expected.extend(y_test)
    predicted.extend(rf.predict(x_test))



In [12]:
print('Macro-average: {0}'.format(metrics.f1_score(expected, predicted, average='macro')))
print('Micro-average: {0}'.format(metrics.f1_score(expected, predicted, average='micro')))
print(metrics.classification_report(expected, predicted))
print(metrics.confusion_matrix(expected, predicted))
accuracy = metrics.accuracy_score(expected, predicted)
print('Accuracy: %.2f%%' % (accuracy*100))

print('\n')
print('precision:', metrics.precision_score(expected, predicted))
print('recall:', metrics.recall_score(expected, predicted))

print('Average = macro')
print('precision:', metrics.precision_score(expected, predicted, average='macro'))
print('recall:', metrics.recall_score(expected, predicted, average='macro'))
print('F1-score:', metrics.f1_score(expected, predicted, average='macro'))

print('\n')
print('Average = micro')
print('precision:', metrics.precision_score(expected, predicted, average='micro'))
print('recall:', metrics.recall_score(expected, predicted, average='micro'))
print('F1-score:', metrics.f1_score(expected, predicted, average='micro'))

print('\n')
print('Average = weighted')
print('precision:', metrics.precision_score(expected, predicted, average='weighted'))
print('recall:', metrics.recall_score(expected, predicted, average='micro'))
print('F1-score:', metrics.f1_score(expected, predicted, average='weighted'))

print('\n')
print('Fbeta score:', metrics.fbeta_score(expected, predicted, beta=1.5))
print('F1-score:', metrics.f1_score(expected, predicted))

Macro-average: 0.5002795167084003
Micro-average: 0.9821057372505543
              precision    recall  f1-score   support

           0       0.99      1.00      0.99     56932
           1       0.02      0.01      0.01       796

    accuracy                           0.98     57728
   macro avg       0.50      0.50      0.50     57728
weighted avg       0.97      0.98      0.98     57728

[[56690   242]
 [  791     5]]
Accuracy: 98.21%


precision: 0.020242914979757085
recall: 0.00628140703517588
Average = macro
precision: 0.5032409230524123
recall: 0.5010153610037118
F1-score: 0.5002795167084003


Average = micro
precision: 0.9821057372505543
recall: 0.9821057372505543
F1-score: 0.9821057372505543


Average = weighted
precision: 0.9729190026873653
recall: 0.9821057372505543
F1-score: 0.9774392012167246


Fbeta score: 0.007973503434739942
F1-score: 0.009587727708533078
