In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import KFold
from sklearn import metrics
from sklearn.utils import shuffle
sns.set()

# Load Preprocessed Data

In [2]:
dataset = pd.read_csv(os.path.join(os.pardir, 'p_season.csv'))
target = pd.read_csv(os.path.join(os.pardir, 'test_season.csv'))
target = target.iloc[:,0]

In [3]:
dataset.drop(['Unnamed: 0', 'periodQ'], axis=1, inplace=True)
target = dataset.pop('PerStatus')

(14392, 46)

In [4]:
#Shuffle data&target
dataset, target = shuffle(dataset, target)

# RandomForest

In [5]:
from sklearn.ensemble import RandomForestClassifier

kfold = KFold(10, True)
predicted = []
expected = []

for train, test in kfold.split(dataset):
    x_train = dataset.iloc[train]
    y_train = target.iloc[train]
    x_test = dataset.iloc[test]
    y_test = target.iloc[test]
    rf = RandomForestClassifier()
    rf.fit(x_train, y_train)
    expected.extend(y_test)
    predicted.extend(rf.predict(x_test))



In [7]:
print('Macro-average: {0}'.format(metrics.f1_score(expected, predicted, average='macro')))
print('Micro-average: {0}'.format(metrics.f1_score(expected, predicted, average='micro')))
print(metrics.classification_report(expected, predicted))
print(metrics.confusion_matrix(expected, predicted))
accuracy = metrics.accuracy_score(expected, predicted)
print('Accuracy: %.2f%%' % (accuracy*100))

print('\n')
print('precision:', metrics.precision_score(expected, predicted))
print('recall:', metrics.recall_score(expected, predicted))

print('Average = macro')
print('precision:', metrics.precision_score(expected, predicted, average='macro'))
print('recall:', metrics.recall_score(expected, predicted, average='macro'))
print('F1-score:', metrics.f1_score(expected, predicted, average='macro'))

print('\n')
print('Average = micro')
print('precision:', metrics.precision_score(expected, predicted, average='micro'))
print('recall:', metrics.recall_score(expected, predicted, average='micro'))
print('F1-score:', metrics.f1_score(expected, predicted, average='micro'))

print('\n')
print('Average = weighted')
print('precision:', metrics.precision_score(expected, predicted, average='weighted'))
print('recall:', metrics.recall_score(expected, predicted, average='micro'))
print('F1-score:', metrics.f1_score(expected, predicted, average='weighted'))

print('\n')
print('Fbeta score:', metrics.fbeta_score(expected, predicted, beta=1.5))
print('F1-score:', metrics.f1_score(expected, predicted))

Macro-average: 0.493089148261562
Micro-average: 0.9444135630906059
              precision    recall  f1-score   support

           0       0.95      1.00      0.97     13596
           1       0.38      0.01      0.01       796

    accuracy                           0.94     14392
   macro avg       0.66      0.50      0.49     14392
weighted avg       0.91      0.94      0.92     14392

[[13586    10]
 [  790     6]]
Accuracy: 94.44%


precision: 0.375
recall: 0.007537688442211055
Average = macro
precision: 0.6600236505286589
recall: 0.5034010889989814
F1-score: 0.493089148261562


Average = micro
precision: 0.9444135630906059
recall: 0.9444135630906059
F1-score: 0.9444135630906059


Average = weighted
precision: 0.9135188372134027
recall: 0.9444135630906059
F1-score: 0.9184906585569789


Fbeta score: 0.01079136690647482
F1-score: 0.01477832512315271
