In [1]:
import numpy as np 
import pandas as pd 
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split

# Load Data

In [57]:
df = pd.read_csv('Data/LMND.csv')
df.tail()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
60,2020-10-22,57.93,58.639999,52.321999,52.529999,52.529999,1420100
61,2020-10-23,53.77,53.869999,50.0,51.09,51.09,1417600
62,2020-10-26,50.549999,52.279999,48.939999,51.189999,51.189999,1125500
63,2020-10-27,51.639999,51.889999,49.799999,50.73,50.73,448100
64,2020-10-28,48.900002,50.200001,48.0,49.130001,49.130001,893300


In [58]:
# Get the Adjusted Close Price 
df = df[['Adj Close']] 
# Take a look at the new data 
print(df.head())

   Adj Close
0  69.050003
1  65.099998
2  58.180000
3  56.860001
4  61.139999


In [59]:
# A variable for predicting 'n' days out into the future
forecast_out = 3 #'n=30' days
#Create another column (the target ) shifted 'n' units up
df['Prediction'] = df[['Adj Close']].shift(-forecast_out)
#print the new data set
df.tail()

Unnamed: 0,Adj Close,Prediction
60,52.529999,50.73
61,51.09,49.130001
62,51.189999,
63,50.73,
64,49.130001,


In [60]:
len(df)

65

In [61]:
### Create the independent data set (X)  #######
# Convert the dataframe to a numpy array
X = np.array(df.drop(['Prediction'],1))

#Remove the last '30' rows
X = X[:-forecast_out]
print(X)

[[69.050003]
 [65.099998]
 [58.18    ]
 [56.860001]
 [61.139999]
 [64.480003]
 [68.699997]
 [66.839996]
 [67.410004]
 [62.75    ]
 [60.669998]
 [59.259998]
 [63.220001]
 [61.48    ]
 [61.790001]
 [61.700001]
 [59.860001]
 [60.200001]
 [66.      ]
 [61.009998]
 [61.130001]
 [58.389999]
 [58.18    ]
 [58.700001]
 [56.189999]
 [54.700001]
 [50.830002]
 [47.27    ]
 [47.18    ]
 [50.18    ]
 [52.400002]
 [50.119999]
 [48.709999]
 [47.150002]
 [48.650002]
 [51.880001]
 [55.240002]
 [52.93    ]
 [54.07    ]
 [49.869999]
 [47.98    ]
 [50.93    ]
 [51.25    ]
 [49.610001]
 [49.720001]
 [51.459999]
 [51.580002]
 [54.      ]
 [52.439999]
 [53.599998]
 [60.099998]
 [67.809998]
 [66.169998]
 [66.730003]
 [68.      ]
 [62.41    ]
 [58.990002]
 [58.950001]
 [58.580002]
 [57.98    ]
 [52.529999]
 [51.09    ]]


In [62]:
### Create the dependent data set (y)  #####
# Convert the dataframe to a numpy array 
y = np.array(df['Prediction'])
# Get all of the y values except the last '30' rows
y = y[:-forecast_out]
print(y)

[56.860001 61.139999 64.480003 68.699997 66.839996 67.410004 62.75
 60.669998 59.259998 63.220001 61.48     61.790001 61.700001 59.860001
 60.200001 66.       61.009998 61.130001 58.389999 58.18     58.700001
 56.189999 54.700001 50.830002 47.27     47.18     50.18     52.400002
 50.119999 48.709999 47.150002 48.650002 51.880001 55.240002 52.93
 54.07     49.869999 47.98     50.93     51.25     49.610001 49.720001
 51.459999 51.580002 54.       52.439999 53.599998 60.099998 67.809998
 66.169998 66.730003 68.       62.41     58.990002 58.950001 58.580002
 57.98     52.529999 51.09     51.189999 50.73     49.130001]


# Split Data into Train & Test

In [63]:
# Split the data into 80% training and 20% testing
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.30)

# Create Regression Models

In [64]:
# random forest regressor 
from sklearn.ensemble import RandomForestRegressor

rfr = RandomForestRegressor(max_depth=3,random_state=0)
rfr.fit(x_train, y_train)

RandomForestRegressor(max_depth=3, random_state=0)

In [65]:
rfr_confidence = rfr.score(x_test, y_test)
print("rfr confidence", rfr_confidence)

rfr confidence -0.06348297992799101


In [66]:
# Create and train the Support Vector Machine (Regressor)
np.random.seed(42)
svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1) 
svr_rbf.fit(x_train, y_train)

SVR(C=1000.0, gamma=0.1)

In [67]:
# Testing Model: Score returns the coefficient of determination R^2 of the prediction. 
# The best possible score is 1.0
svm_confidence = svr_rbf.score(x_test, y_test)
print("svm confidence: ", svm_confidence)

svm confidence:  -0.34149421881011266


In [68]:
# Create and train the Linear Regression  Model
np.random.seed(42)
lr = LinearRegression()
# Train the model
lr.fit(x_train, y_train)

LinearRegression()

In [69]:
# Testing Model: Score returns the coefficient of determination R^2 of the prediction. 
# The best possible score is 1.0
lr_confidence = lr.score(x_test, y_test)
print("lr confidence: ", lr_confidence)

lr confidence:  -0.07509274268474475


In [70]:
# Set x_forecast equal to the last 30 rows of the original data set from Adj. Close column
x_forecast = np.array(df.drop(['Prediction'],1))[-forecast_out:]
print(x_forecast)

[[51.189999]
 [50.73    ]
 [49.130001]]


In [71]:
# Print linear regression model predictions for the next '30' days
lr_prediction = lr.predict(x_forecast)
print(lr_prediction[-1])
# Print support vector regressor model predictions for the next '30' days
svm_prediction = svr_rbf.predict(x_forecast)
print(svm_prediction[-1])

rfr_prediction = rfr.predict(x_forecast)
print(rfr_prediction[-1])

50.56572838060755
51.856298588542124
51.2470825240511
