# Predicting stock price

## Linear Regression to predict values

The Dataset contains stock price of TCS for a 5 year period

In [65]:
#importing the required packages/libraries
import pandas as pd
import numpy as np
import quandl
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
import seaborn as sns

# Importing the Dataset

In [142]:
df = pd.read_csv("new_book.csv")
#filling null values
df['Adj Close'].fillna(df['Adj Close'].mean())

df.head()


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,01-01-2015,1283.5,1283.5,1270.5,1272.780029,1100.664063,366830
1,02-01-2015,1275.5,1295.469971,1275.300049,1289.719971,1115.313232,925740
2,05-01-2015,1290.5,1299.949951,1262.319946,1270.119995,1098.363892,1754242
3,06-01-2015,1264.550049,1264.550049,1220.0,1223.300049,1057.875244,2423784
4,07-01-2015,1235.0,1239.569946,1203.719971,1208.849976,1045.379272,2636332


In [143]:
#Create a new df for manipulation/adding/removing coloumns
new_df = df[['Adj Close']]
new_df.head()

Unnamed: 0,Adj Close
0,1100.664063
1,1115.313232
2,1098.363892
3,1057.875244
4,1045.379272


In [204]:
#variable to predict 'n' days into future
forecast_out  = 10
#add coloumn with target/dependent varible shifted by 'n' units
new_df['Prediction'] = new_df['Adj Close'].shift(-forecast_out)
new_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.


Unnamed: 0,Adj Close,Prediction
0,1100.664063,1097.87085
1,1115.313232,1094.861206
2,1098.363892,1085.721069
3,1057.875244,1081.154907
4,1045.379272,1086.931396


In [205]:
### Create independent dataset X ###
#convert dataset to numpy array

X = np.array(new_df.drop(['Prediction'],1))

#Removing the last 'n' rows

X = X[:-forecast_out]

print(X.shape)

(1220, 1)


In [206]:
### Create Dependent dataset Y ###
#Convert dataset to numpy array

Y = np.array(new_df['Prediction'])
Y = Y[:-forecast_out]
Y.shape

(1220,)

# Create test/train data

In [207]:
#split the data into 80% train and 20% test
x_train, x_test, y_train, y_test = train_test_split(X,Y,test_size = 0.2)

## Create a Support Vector Machine

In [208]:
svr_rbf = SVR(kernel = 'rbf', C = 1e3, gamma = 0.1)
svr_rbf.fit(x_train,y_train)

SVR(C=1000.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma=0.1,
    kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [209]:
#testing accuracy of the SVM 
svr_confidence = svr_rbf.score(x_test,y_test)
print("SVM Confidence : ",svr_confidence)

SVM Confidence :  0.9527416316243583


# Create a Linear Regression model

In [210]:
lr = LinearRegression()
lr.fit(x_train,y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [211]:
#testing accuracy of Linear Regression model
lr_confidence = lr.score(x_test,y_test)
print("Linear Regression confidence : ",lr_confidence)

Linear Regression confidence :  0.9763542052005894


In [212]:
x_forecast = np.array(new_df.drop(['Prediction'],1))[-forecast_out:]
print(x_forecast)

[[2160.044922]
 [2162.838623]
 [2223.999756]
 [2217.863525]
 [2226.643799]
 [2210.580322]
 [2196.961182]
 [2193.519043]
 [2178.253662]
 [2156.802246]]


# Linear Regression prediction for the next 'n' days

In [213]:
predict = lr.predict(x_forecast)
predict

array([2168.44012687, 2171.22703679, 2232.23949614, 2226.1181814 ,
       2234.87711186, 2218.85268279, 2205.26664891, 2201.83287724,
       2186.60460412, 2165.20533333])

In [214]:
svm_pred = svr_rbf.predict(x_forecast)
svm_pred

array([2102.24910329, 2156.31676773, 2112.23522887, 2145.81250393,
       2048.27198625, 2133.59366006, 2097.35210481, 2092.53122717,
       2089.36140475, 2051.60714493])

# Compare predictions to actual

In [215]:
actual = df['Adj Close'].tail(forecast_out)
actual = np.array(actual)
final = [actual,svm_pred,predict]
#Creating a sample Dataframe to compare values
pred_df = pd.DataFrame(data=actual,columns=['Actual'])
pred_df['SVM_Pred'] = svm_pred
pred_df['LR_Pred'] = predict
pred_df

Unnamed: 0,Actual,SVM_Pred,LR_Pred
0,2160.044922,2102.249103,2168.440127
1,2162.838623,2156.316768,2171.227037
2,2223.999756,2112.235229,2232.239496
3,2217.863525,2145.812504,2226.118181
4,2226.643799,2048.271986,2234.877112
5,2210.580322,2133.59366,2218.852683
6,2196.961182,2097.352105,2205.266649
7,2193.519043,2092.531227,2201.832877
8,2178.253662,2089.361405,2186.604604
9,2156.802246,2051.607145,2165.205333
