In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.svm import LinearSVR
from sklearn import metrics
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from math import sqrt

In [2]:
def data_split(data, all = False):

    data_copy = data.copy()
    
    if(all == False):            
        train_data = data_copy[:-365]
        test_data = data_copy[-365:]
    else:
        num_data = data_copy.shape[0]
        split_val = int(num_data*0.8)
        train_data = data_copy[:split_val]
        test_data = data_copy[split_val:]

    X_train = train_data[['Open','High','Low','Volume','Turnover']]
    Y_train = train_data[['Close']]

    X_test = test_data[['Open','High','Low','Volume','Turnover']]
    Y_test = test_data[['Close']]

    return X_train,Y_train,X_test,Y_test
    

In [3]:
def readData(path):
    data = pd.read_csv(path , )
    data['Date'] = pd.to_datetime(data['Date'])
    data.set_index('Date', inplace = True)
    data = data.loc['2012-03-01': '2021-01-07']
    data.dropna(inplace = True)
    return data

In [4]:
def volatility(data, Rf):
    
    Nifty = readData('../datasets/inidices/NIFTY 50.csv')
    Nifty = Nifty.loc['2019-1-1':'2021-1-1']

    data = data.loc['2019-1-1':'2021-1-1']

    beta = np.cov(Nifty['Close'], data['Close'])[0][1]/data['Close'].var()

    alpha = (data['Close'][-1] - data['Close'][0])/data['Close'][0] - Rf + beta * ((Nifty['Close'][-1] - Nifty['Close'][0])/Nifty['Close'][0] - Rf)

    print(alpha * 100, beta)

In [8]:
def SVRModel(Xtrain, Ytrain, Xtest, Ytest):
    
    PipelineObject = Pipeline([('Scaler', StandardScaler()), ('svr', LinearSVR())])
    PipelineObject.fit(Xtrain,Ytrain)
    predictedSVR = PipelineObject.predict(Xtest)
    
    return (PipelineObject, sqrt(metrics.mean_squared_error(Ytest, predictedSVR)))

In [9]:
def LRModel(Xtrain, Ytrain, Xtest, Ytest):

    Regressor = LinearRegression()
    model = Regressor.fit(Xtrain, Ytrain)
    predictedLR = Regressor.predict(Xtest)
    
    return (model, sqrt(metrics.mean_squared_error(Ytest, predictedLR)))

In [5]:
data = readData('../datasets/companies/LICHSGFIN.csv')

In [6]:
X_train,Y_train,X_test,Y_test = data_split(data, True)

In [7]:
volatility(data, 0.08)

58.232064297892514 4.325034575578774


In [10]:
LRModel(X_train, Y_train, X_test, Y_test)

(LinearRegression(), 4.0421137932426365)

In [11]:
SVRModel(X_train, Y_train['Close'].tolist(), X_test, Y_test['Close'].tolist())

(Pipeline(steps=[('Scaler', StandardScaler()), ('svr', LinearSVR())]),
 6.033771591164614)