# Logistic Regression

## Multiple Class Prediction

Step 1: Data Pre-Processing

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import warnings
warnings.filterwarnings("ignore")

# fetch yahoo data
import yfinance as yf
yf.pdr_override()

In [2]:
# input
symbol = 'AMD'
start = '2014-01-01'
end = '2018-08-27'

# Read data 
dataset = yf.download(symbol,start,end)

# Only keep close columns 
dataset.head()

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Adj Close,Close,High,Low,Open,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2014-01-02,3.95,3.95,3.98,3.84,3.85,20548400
2014-01-03,4.0,4.0,4.0,3.88,3.98,22887200
2014-01-06,4.13,4.13,4.18,3.99,4.01,42398300
2014-01-07,4.18,4.18,4.25,4.11,4.19,42932100
2014-01-08,4.18,4.18,4.26,4.14,4.23,30678700


In [3]:
dataset['Buy/Sell'] = np.where(dataset['Adj Close'].shift(-1) > dataset['Adj Close'],1,-1)

In [4]:
dataset['Increase/Decrease'] = np.where(dataset['Volume'].shift(-1) > dataset['Volume'],1,0)

In [5]:
dataset.head()

Unnamed: 0_level_0,Adj Close,Close,High,Low,Open,Volume,Buy/Sell,Increase/Decrease
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2014-01-02,3.95,3.95,3.98,3.84,3.85,20548400,1,1
2014-01-03,4.0,4.0,4.0,3.88,3.98,22887200,1,1
2014-01-06,4.13,4.13,4.18,3.99,4.01,42398300,1,1
2014-01-07,4.18,4.18,4.25,4.11,4.19,42932100,-1,0
2014-01-08,4.18,4.18,4.26,4.14,4.23,30678700,-1,0


In [6]:
X = dataset.iloc[:, 0:4].values
y = dataset.iloc[:, 7].values

In [7]:
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

In [8]:
# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

Step 2: Logistic Regression Model

In [9]:
# fit final model
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
model.fit(X_train, y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False)

Step 3: Predection

In [10]:
# Predicting the Test set results
y_pred = model.predict(X_test)

In [11]:
# show the inputs and predicted outputs
for i in range(len(X_test)):
	print("X=%s, Predicted=%s" % (X_test[i], y_pred[i]))

X=[1.28092529 1.28092529 1.31536939 1.33290278], Predicted=0
X=[0.86500819 0.86500819 0.84102454 0.87833678], Predicted=0
X=[-0.63805896 -0.63805896 -0.61632405 -0.64319673], Predicted=0
X=[-0.93867239 -0.93867239 -0.9433192  -0.94413629], Predicted=0
X=[-1.03956318 -1.03956318 -1.04020663 -1.03462859], Predicted=0
X=[-0.95720334 -0.95720334 -0.92918975 -0.95886758], Predicted=0
X=[1.33445924 1.33445924 1.39207198 1.34132066], Predicted=0
X=[-0.12330995 -0.12330995 -0.14399773 -0.12760104], Predicted=0
X=[-0.93249541 -0.93249541 -0.92918975 -0.93150946], Predicted=0
X=[1.41064207 1.41064207 1.37592409 1.34763402], Predicted=0
X=[-0.63805896 -0.63805896 -0.64256443 -0.65161461], Predicted=0
X=[-0.06154003 -0.06154003 -0.07738763 -0.05604895], Predicted=0
X=[-0.81719159 -0.81719159 -0.81615441 -0.82418138], Predicted=0
X=[0.10523864 0.10523864 0.09822084 0.11230888], Predicted=0
X=[-0.90161045 -0.90161045 -0.87267208 -0.89994236], Predicted=0
X=[0.65910859 0.65910859 0.63312029 0.6636805

In [12]:
("X=%s, Predicted=%s" % (X_test[0], y_pred[0]))

'X=[1.28092529 1.28092529 1.31536939 1.33290278], Predicted=0'

Step 4: Evaluating The Predection

In [13]:
# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)

In [14]:
print(cm)

[[153   0]
 [140   0]]


In [15]:
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.52      1.00      0.69       153
           1       0.00      0.00      0.00       140

   micro avg       0.52      0.52      0.52       293
   macro avg       0.26      0.50      0.34       293
weighted avg       0.27      0.52      0.36       293



In [16]:
from sklearn.metrics import accuracy_score
# Print the accuracy from the testing data.
print(accuracy_score(model.predict(X_test), y_test))

0.5221843003412969


In [17]:
from sklearn import metrics

print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
print("Precision:",metrics.precision_score(y_test, y_pred))
print("Recall:",metrics.recall_score(y_test, y_pred))

Accuracy: 0.5221843003412969
Precision: 0.0
Recall: 0.0
