# Ordinal Regression

### Ordinal regression is a statistical technique that is used to predict behavior of ordinal level dependent variables with a set of independent variables. The dependent variable is the order response category variable and the independent variable may be categorical or continuous.

In [1]:
import warnings
warnings.filterwarnings("ignore")

import pandas as pd
import numpy as np

import yfinance as yf
yf.pdr_override()

In [2]:
# input
symbol = 'AMD'
start = '2014-01-01'
end = '2018-08-27'

# Read data 
dataset = yf.download(symbol,start,end)

# View Columns
dataset.head()

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2014-01-02 00:00:00-05:00,3.85,3.98,3.84,3.95,3.95,20548400
2014-01-03 00:00:00-05:00,3.98,4.0,3.88,4.0,4.0,22887200
2014-01-06 00:00:00-05:00,4.01,4.18,3.99,4.13,4.13,42398300
2014-01-07 00:00:00-05:00,4.19,4.25,4.11,4.18,4.18,42932100
2014-01-08 00:00:00-05:00,4.23,4.26,4.14,4.18,4.18,30678700


In [3]:
dataset.index = pd.Index(dataset.index.date, name="Date")
dataset.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2014-01-02,3.85,3.98,3.84,3.95,3.95,20548400
2014-01-03,3.98,4.0,3.88,4.0,4.0,22887200
2014-01-06,4.01,4.18,3.99,4.13,4.13,42398300
2014-01-07,4.19,4.25,4.11,4.18,4.18,42932100
2014-01-08,4.23,4.26,4.14,4.18,4.18,30678700


In [4]:
dataset['Increase_Decrease'] = np.where(dataset['Volume'].shift(-1) > dataset['Volume'],1,0)
dataset['Buy_Sell_on_Open'] = np.where(dataset['Open'].shift(-1) > dataset['Open'],1,0)
dataset['Buy_Sell'] = np.where(dataset['Adj Close'].shift(-1) > dataset['Adj Close'],1,0)
dataset['Returns'] = dataset['Adj Close'].pct_change()
dataset = dataset.dropna()

In [5]:
dataset.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Increase_Decrease,Buy_Sell_on_Open,Buy_Sell,Returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2018-08-20,19.790001,20.08,19.35,19.98,19.98,62983200,0,1,1,0.010622
2018-08-21,19.98,20.42,19.860001,20.4,20.4,55629000,1,1,1,0.021021
2018-08-22,20.280001,20.92,20.209999,20.9,20.9,62002700,1,1,1,0.02451
2018-08-23,21.190001,22.32,21.139999,22.290001,22.290001,113444100,1,1,1,0.066507
2018-08-24,22.91,24.0,22.67,23.98,23.98,164328200,0,0,0,0.075819


In [6]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

In [7]:
n = len(dataset['Adj Close'])
X = np.array(dataset['Open'].values)
y = dataset['Increase_Decrease'].values

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                   test_size=0.4, random_state=42)

In [9]:
# Scale the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train.reshape(-1,1))
X_test = scaler.transform(X_test.reshape(-1,1))

# Fit an ordinal logistic regression model
clf = LogisticRegression(multi_class='ovr', solver='liblinear')
clf.fit(X_train, y_train)

LogisticRegression(multi_class='ovr', solver='liblinear')

In [10]:
# Evaluate the model on the test set
accuracy = clf.score(X_test, y_test)
print(f"Accuracy: {accuracy}")

Accuracy: 0.5299145299145299
