In [6]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import KFold
from sklearn import metrics
from sklearn.utils import shuffle
sns.set()

# Load Preprocessed Data

In [7]:
dataset = pd.read_csv(os.path.join(os.pardir, 'p_season.csv'))
target = pd.read_csv(os.path.join(os.pardir, 'test_season.csv'))
target = target.iloc[:,0]

In [8]:
dataset.drop(['Unnamed: 0', 'periodQ'], axis=1, inplace=True)
target = dataset.pop('PerStatus')

In [9]:
dataset.shape

(57728, 7)

In [10]:
#Shuffle data&target
dataset, target = shuffle(dataset, target)

# DecisionTree

In [11]:
from sklearn.tree import DecisionTreeClassifier

kfold = KFold(10, True)
predicted = []
expected = []

for train, test in kfold.split(dataset):
    x_train = dataset.iloc[train]
    y_train = target.iloc[train]
    x_test = dataset.iloc[test]
    y_test = target.iloc[test]
    tree = DecisionTreeClassifier()
    tree.fit(x_train, y_train)
    expected.extend(y_test)
    predicted.extend(tree.predict(x_test))



In [12]:
print('Macro-average: {0}'.format(metrics.f1_score(expected, predicted, average='macro')))
print('Micro-average: {0}'.format(metrics.f1_score(expected, predicted, average='micro')))
print(metrics.classification_report(expected, predicted))
print(metrics.confusion_matrix(expected, predicted))
accuracy = metrics.accuracy_score(expected, predicted)
print('Accuracy: %.2f%%' % (accuracy*100))

print('\n')
print('precision:', metrics.precision_score(expected, predicted))
print('recall:', metrics.recall_score(expected, predicted))

print('Average = macro')
print('precision:', metrics.precision_score(expected, predicted, average='macro'))
print('recall:', metrics.recall_score(expected, predicted, average='macro'))
print('F1-score:', metrics.f1_score(expected, predicted, average='macro'))

print('\n')
print('Average = micro')
print('precision:', metrics.precision_score(expected, predicted, average='micro'))
print('recall:', metrics.recall_score(expected, predicted, average='micro'))
print('F1-score:', metrics.f1_score(expected, predicted, average='micro'))

print('\n')
print('Average = weighted')
print('precision:', metrics.precision_score(expected, predicted, average='weighted'))
print('recall:', metrics.recall_score(expected, predicted, average='micro'))
print('F1-score:', metrics.f1_score(expected, predicted, average='weighted'))

print('\n')
print('Fbeta score:', metrics.fbeta_score(expected, predicted, beta=1.5))
print('F1-score:', metrics.f1_score(expected, predicted))

Macro-average: 0.4999890065594196
Micro-average: 0.9727515243902439
              precision    recall  f1-score   support

           0       0.99      0.99      0.99     56932
           1       0.01      0.01      0.01       796

    accuracy                           0.97     57728
   macro avg       0.50      0.50      0.50     57728
weighted avg       0.97      0.97      0.97     57728

[[56144   788]
 [  785    11]]
Accuracy: 97.28%


precision: 0.01376720901126408
recall: 0.013819095477386936
Average = macro
precision: 0.4999890516415382
recall: 0.4999890109579726
F1-score: 0.4999890065594196


Average = micro
precision: 0.9727515243902439
recall: 0.9727515243902439
F1-score: 0.9727515243902439


Average = weighted
precision: 0.9728020601970584
recall: 0.9727515243902439
F1-score: 0.9727767909456301


Fbeta score: 0.013803088803088805
F1-score: 0.013793103448275864
