# Evaluation Methods 

In [1]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import statsmodels.api as sm
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split,StratifiedKFold

import warnings
warnings.filterwarnings('ignore')

In [2]:
data = pd.read_csv('Weekly.csv') 

In [3]:
data.head()

Unnamed: 0,Year,Lag1,Lag2,Lag3,Lag4,Lag5,Volume,Today,Direction
0,1990,0.816,1.572,-3.936,-0.229,-3.484,0.154976,-0.27,Down
1,1990,-0.27,0.816,1.572,-3.936,-0.229,0.148574,-2.576,Down
2,1990,-2.576,-0.27,0.816,1.572,-3.936,0.159837,3.514,Up
3,1990,3.514,-2.576,-0.27,0.816,1.572,0.16163,0.712,Up
4,1990,0.712,3.514,-2.576,-0.27,0.816,0.153728,1.178,Up


Usando os próprios exemplos de treinamento como teste

In [4]:
X = data[["Lag1", "Lag2", "Lag3", "Lag4", "Lag5", "Volume"]]
Y = data[["Direction"]]
logisticRegr = LogisticRegression()
logisticRegr.fit(X, Y)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='warn', n_jobs=None, penalty='l2',
                   random_state=None, solver='warn', tol=0.0001, verbose=0,
                   warm_start=False)

In [6]:
predictions = logisticRegr.predict(X)
print(confusion_matrix(Y, predictions))
print(classification_report(Y, predictions))

[[ 55 429]
 [ 47 558]]
              precision    recall  f1-score   support

        Down       0.54      0.11      0.19       484
          Up       0.57      0.92      0.70       605

    accuracy                           0.56      1089
   macro avg       0.55      0.52      0.44      1089
weighted avg       0.55      0.56      0.47      1089



Holdout

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.4, random_state=0)
X_train.shape, y_train.shape

((653, 6), (653, 1))

In [11]:
X_test.shape, y_test.shape

((436, 6), (436, 1))

In [12]:
logisticRegr.fit(X_train, y_train)
predictions = logisticRegr.predict(X_test)
print(confusion_matrix(y_test, predictions))
print(classification_report(y_test, predictions))

[[ 29 167]
 [ 33 207]]
              precision    recall  f1-score   support

        Down       0.47      0.15      0.22       196
          Up       0.55      0.86      0.67       240

    accuracy                           0.54       436
   macro avg       0.51      0.51      0.45       436
weighted avg       0.51      0.54      0.47       436



k-fold

In [15]:
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=1)
lst_accu_stratified = []

In [20]:
for train_index, test_index in skf.split(X, Y):
    x_train_fold, x_test_fold = X.iloc[train_index], X.iloc[test_index]
    y_train_fold, y_test_fold = Y.iloc[train_index], Y.iloc[test_index]
    logisticRegr.fit(x_train_fold, y_train_fold)
    lst_accu_stratified.append(logisticRegr.score(x_test_fold, y_test_fold))
    predictions = logisticRegr.predict(x_test_fold)
    print(classification_report(y_test_fold, predictions))

              precision    recall  f1-score   support

        Down       0.33      0.02      0.04        49
          Up       0.55      0.97      0.70        61

    accuracy                           0.55       110
   macro avg       0.44      0.49      0.37       110
weighted avg       0.45      0.55      0.41       110

              precision    recall  f1-score   support

        Down       0.60      0.12      0.20        49
          Up       0.57      0.93      0.71        61

    accuracy                           0.57       110
   macro avg       0.58      0.53      0.46       110
weighted avg       0.58      0.57      0.48       110

              precision    recall  f1-score   support

        Down       0.50      0.08      0.14        49
          Up       0.56      0.93      0.70        61

    accuracy                           0.55       110
   macro avg       0.53      0.51      0.42       110
weighted avg       0.53      0.55      0.45       110

              preci

In [23]:
print('List of possible accuracy:', lst_accu_stratified)
print('\nMaximum Accuracy That can be obtained from this model is:',
      max(lst_accu_stratified)*100, '%')
print('\nMinimum Accuracy:',
      min(lst_accu_stratified)*100, '%')
print('\nOverall Accuracy:',
      mean(lst_accu_stratified)*100, '%')
print('\nStandard Deviation is:', stdev(lst_accu_stratified))

List of possible accuracy: [0.5454545454545454, 0.5727272727272728, 0.5545454545454546, 0.5454545454545454, 0.5321100917431193, 0.5555555555555556, 0.5462962962962963, 0.5185185185185185, 0.5925925925925926, 0.5648148148148148]

Maximum Accuracy That can be obtained from this model is: 59.25925925925925 %

Minimum Accuracy: 51.85185185185185 %


NameError: name 'mean' is not defined