In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.svm import LinearSVR
from sklearn import metrics
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from math import sqrt

In [4]:
def data_split(data, all = False):

    data_copy = data.copy()
    
    if(all == False):            
        train_data = data_copy[:-365]
        test_data = data_copy[-365:]
    else:
        num_data = data_copy.shape[0]
        split_val = int(num_data*0.8)
        train_data = data_copy[:split_val]
        test_data = data_copy[split_val:]

    X_train = train_data[['Open','High','Low','Volume','Turnover']]
    Y_train = train_data[['Close']]

    X_test = test_data[['Open','High','Low','Volume','Turnover']]
    Y_test = test_data[['Close']]

    return X_train,Y_train,X_test,Y_test
    

In [5]:
def readData(path):
    data = pd.read_csv(path , )
    data['Date'] = pd.to_datetime(data['Date'])
    data.set_index('Date', inplace = True)
    data = data.loc['2012-03-01': '2021-01-07']
    data.dropna(inplace = True)
    return data

In [6]:
def volatility(data, Rf):
    
    Nifty = readData('../datasets/inidices/NIFTY 50.csv')
    Nifty = Nifty.loc['2019-1-1':'2021-1-1']

    data = data.loc['2019-1-1':'2021-1-1']

    beta = np.cov(Nifty['Close'], data['Close'])[0][1]/data['Close'].var()

    alpha = (data['Close'][-1] - data['Close'][0])/data['Close'][0] - Rf + beta * ((Nifty['Close'][-1] - Nifty['Close'][0])/Nifty['Close'][0] - Rf)

    return (alpha * 100, beta)

In [20]:
def SVRModel(Xtrain, Ytrain, Xtest, Ytest):
    
    i = 0.0001
    temp = []

    while(i <= 100):
        
        PipelineObject = Pipeline([('Scaler', StandardScaler()), ('svr', LinearSVR(C = i))])
        PipelineObject.fit(Xtrain,Ytrain)
        predictedSVR = PipelineObject.predict(Xtest)
        temp.append([PipelineObject, sqrt(metrics.mean_squared_error(Ytest, predictedSVR)), i])
        i = i * 10
    
    return (min(temp, key = lambda x: x[1]))

In [21]:
def SVRModelFinal(data, c):

    PipelineObject = Pipeline([('Scaler', StandardScaler()), ('svr', LinearSVR(C = c))])
    Xtrain = data[['Open','High','Low','Volume','Turnover']]
    Ytrain = data[['Close']]
    PipelineObject.fit(Xtrain,Ytrain)

    return PipelineObject

In [8]:
def LRModel(Xtrain, Ytrain, Xtest, Ytest):
    
    Regressor = LinearRegression()
    model = Regressor.fit(Xtrain, Ytrain)
    predictedLR = Regressor.predict(Xtest)
    
    return (model, sqrt(metrics.mean_squared_error(Ytest, predictedLR)))

In [22]:
def LRModelFinal(data):

    Regressor = LinearRegression()
    Xtrain = data[['Open','High','Low','Volume','Turnover']]
    Ytrain = data[['Close']]
    model = Regressor.fit(Xtrain, Ytrain['Close'].tolist())
    
    return model

In [5]:
data = readData('../datasets/companies/LICHSGFIN.csv')

In [6]:
X_train,Y_train,X_test,Y_test = data_split(data, True)

In [7]:
volatility(data, 0.08)

58.232064297892514 4.325034575578774


In [10]:
LRModel(X_train, Y_train, X_test, Y_test)

(LinearRegression(), 4.0421137932426365)

In [11]:
SVRModel(X_train, Y_train['Close'].tolist(), X_test, Y_test['Close'].tolist())

(Pipeline(steps=[('Scaler', StandardScaler()), ('svr', LinearSVR())]),
 6.033771591164614)

In [26]:
paths = ['../datasets/companies/BAJAJ-AUTO.csv', '../datasets/companies/BAJFINANCE.csv', '../datasets/companies/HDFCBANK.csv', '../datasets/companies/INFY.csv', '../datasets/companies/ITC.csv', '../datasets/companies/LICHSGFIN.csv', '../datasets/companies/PANACEABIO.csv', '../datasets/companies/PFIZER.csv', '../datasets/companies/RELIANCE.csv', '../datasets/companies/SBIN.csv', '../datasets/companies/TATAMOTORS.csv', '../datasets/companies/TCS.csv']

modelSelection = []

for i in paths:
    
    temp = []
    
    data = readData(i)
    X_train,Y_train,X_test,Y_test = data_split(data, True)
    
    LR = LRModel(X_train, Y_train, X_test, Y_test)
    SVR = SVRModel(X_train, Y_train['Close'].tolist(), X_test, Y_test['Close'].tolist())
    
    temp.append(LR[1])
    temp.append(SVR[1])
    
    x = temp.index(min(temp))

    print(i)
    
    if(x == 0):
        print("Linear Regression -", temp[0])
        modelSelection.append([i, "Linear Regression", LRModelFinal(data)])
    elif(x == 1):
        print("SVR -", temp[1])
        modelSelection.append([i, "SVR", SVRModelFinal(data, SVR[2])])



../datasets/companies/BAJAJ-AUTO.csv
Linear Regression - 25.99410710103258
../datasets/companies/BAJFINANCE.csv
Linear Regression - 37.31895351159051
../datasets/companies/HDFCBANK.csv
Linear Regression - 9.513818882261909




../datasets/companies/INFY.csv
Linear Regression - 5.524182098044429
../datasets/companies/ITC.csv
Linear Regression - 1.7158697912952463


  y = column_or_1d(y, warn=True)


../datasets/companies/LICHSGFIN.csv
SVR - 4.003558282205828
../datasets/companies/PANACEABIO.csv
Linear Regression - 2.6086584364382825


  y = column_or_1d(y, warn=True)


../datasets/companies/PFIZER.csv
Linear Regression - 49.953005709394226
../datasets/companies/RELIANCE.csv
SVR - 11.18687799099602


  y = column_or_1d(y, warn=True)


../datasets/companies/SBIN.csv
Linear Regression - 3.2655157523512206
../datasets/companies/TATAMOTORS.csv
SVR - 1.5403133295548517
../datasets/companies/TCS.csv
Linear Regression - 16.73451419402659


