# Predicting stock price

## Linear Regression to predict values

The Dataset contains stock price of TCS for a 5 year period

In [1]:
#importing the required packages/libraries
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split

# Importing the Dataset

In [2]:
df = pd.read_csv("new_book.csv", na_values=['null'])
#filling null values
df['Adj Close'].fillna(df['Adj Close'].mean())

df.head()


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,01-01-2015,1283.5,1283.5,1270.5,1272.780029,1100.664063,366830
1,02-01-2015,1275.5,1295.469971,1275.300049,1289.719971,1115.313232,925740
2,05-01-2015,1290.5,1299.949951,1262.319946,1270.119995,1098.363892,1754242
3,06-01-2015,1264.550049,1264.550049,1220.0,1223.300049,1057.875244,2423784
4,07-01-2015,1235.0,1239.569946,1203.719971,1208.849976,1045.379272,2636332


In [3]:
#Create a new df for manipulation/adding/removing coloumns
new_df = df[['Adj Close']]
new_df.head()

Unnamed: 0,Adj Close
0,1100.664063
1,1115.313232
2,1098.363892
3,1057.875244
4,1045.379272


In [4]:
#variable to predict 'n' days into future
forecast_out  = 10
#add coloumn with target/dependent varible shifted by 'n' units
new_df['Prediction'] = new_df['Adj Close'].shift(-forecast_out)
new_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.


Unnamed: 0,Adj Close,Prediction
0,1100.664063,1097.87085
1,1115.313232,1094.861206
2,1098.363892,1085.721069
3,1057.875244,1081.154907
4,1045.379272,1086.931396


In [5]:
### Create independent dataset X ###
#convert dataset to numpy array

X = np.array(new_df.drop(['Prediction'],1))

#Removing the last 'n' rows

X = X[:-forecast_out]

print(X.shape)

(1220, 1)


In [6]:
### Create Dependent dataset Y ###
#Convert dataset to numpy array

Y = np.array(new_df['Prediction'])
Y = Y[:-forecast_out]
Y.shape

(1220,)

# Create test/train data

In [7]:
#split the data into 80% train and 20% test
x_train, x_test, y_train, y_test = train_test_split(X,Y,test_size = 0.2)

## Create a Support Vector Machine

In [8]:
svr_rbf = SVR(kernel = 'rbf', C = 1e3, gamma = 0.1)
svr_rbf.fit(x_train,y_train)

SVR(C=1000.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma=0.1,
    kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [9]:
#testing accuracy of the SVM 
svr_confidence = svr_rbf.score(x_test,y_test)
print("SVM Confidence : ",svr_confidence)

SVM Confidence :  0.9230649443590662


# Create a Linear Regression model

In [10]:
lr = LinearRegression()
lr.fit(x_train,y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [11]:
#testing accuracy of Linear Regression model
lr_confidence = lr.score(x_test,y_test)
print("Linear Regression confidence : ",lr_confidence)

Linear Regression confidence :  0.9731098717115386


In [12]:
x_forecast = np.array(new_df.drop(['Prediction'],1))[-forecast_out:]
print(x_forecast)

[[2160.044922]
 [2162.838623]
 [2223.999756]
 [2217.863525]
 [2226.643799]
 [2210.580322]
 [2196.961182]
 [2193.519043]
 [2178.253662]
 [2156.802246]]


# Linear Regression prediction for the next 'n' days

In [13]:
predict = lr.predict(x_forecast)
predict

array([2166.72329547, 2169.50846639, 2230.48285445, 2224.36535937,
       2233.11882432, 2217.10439437, 2203.52683806, 2200.09520904,
       2184.87643824, 2163.49052042])

In [14]:
svm_pred = svr_rbf.predict(x_forecast)
svm_pred

array([2102.92440074, 2144.88900774, 2121.36361371, 1704.81882841,
       2130.29183625, 2216.57202306, 2097.59048116, 2082.85328614,
       2134.61391822, 2080.96471904])

# Compare predictions to actual

In [15]:
actual = df['Adj Close'].tail(forecast_out)
actual = np.array(actual)
final = [actual,svm_pred,predict]
#Creating a sample Dataframe to compare values
pred_df = pd.DataFrame(data=actual,columns=['Actual'])
pred_df['SVM_Pred'] = svm_pred
pred_df['LR_Pred'] = predict
pred_df

Unnamed: 0,Actual,SVM_Pred,LR_Pred
0,2160.044922,2102.924401,2166.723295
1,2162.838623,2144.889008,2169.508466
2,2223.999756,2121.363614,2230.482854
3,2217.863525,1704.818828,2224.365359
4,2226.643799,2130.291836,2233.118824
5,2210.580322,2216.572023,2217.104394
6,2196.961182,2097.590481,2203.526838
7,2193.519043,2082.853286,2200.095209
8,2178.253662,2134.613918,2184.876438
9,2156.802246,2080.964719,2163.49052
