In [55]:
pip install quandl

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [56]:
# This program predicts stock prices by using machine learning models

# Install the dependencies
import quandl
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split

In [57]:
# Get the stock data

df = quandl.get("WIKI/AMZN")
# Take a look at the data
print(df.head())

             Open   High    Low  Close     Volume  Ex-Dividend  Split Ratio  \
Date                                                                          
1997-05-16  22.38  23.75  20.50  20.75  1225000.0          0.0          1.0   
1997-05-19  20.50  21.25  19.50  20.50   508900.0          0.0          1.0   
1997-05-20  20.75  21.00  19.63  19.63   455600.0          0.0          1.0   
1997-05-21  19.25  19.75  16.50  17.13  1571100.0          0.0          1.0   
1997-05-22  17.25  17.38  15.75  16.75   981400.0          0.0          1.0   

            Adj. Open  Adj. High  Adj. Low  Adj. Close  Adj. Volume  
Date                                                                 
1997-05-16   1.865000   1.979167  1.708333    1.729167   14700000.0  
1997-05-19   1.708333   1.770833  1.625000    1.708333    6106800.0  
1997-05-20   1.729167   1.750000  1.635833    1.635833    5467200.0  
1997-05-21   1.604167   1.645833  1.375000    1.427500   18853200.0  
1997-05-22   1.437500   1.

In [58]:
# get the Adjusted Close Price

df = df[['Adj. Close']]
# Take a look at the new data
print(df.head())

            Adj. Close
Date                  
1997-05-16    1.729167
1997-05-19    1.708333
1997-05-20    1.635833
1997-05-21    1.427500
1997-05-22    1.395833


In [59]:
# A variable for predicting 'x' days out into the future
forecast_day = 30
# Create another column (the target depended variable) shifted 'x' units up
df['Prediction'] = df[['Adj. Close']].shift(-forecast_day)
# Take a look at the new data set
print(df.head())

            Adj. Close  Prediction
Date                              
1997-05-16    1.729167    1.541667
1997-05-19    1.708333    1.515833
1997-05-20    1.635833    1.588333
1997-05-21    1.427500    1.911667
1997-05-22    1.395833    2.000000


In [60]:
## Create the indepedent data set (x) ##
# convert the dataframe to a numpy array
x = np.array(df.drop(['Prediction'], 1))
# Remove the last 'x' rows
x = x[:-forecast_day]
print(x)

[[   1.72916667]
 [   1.70833333]
 [   1.63583333]
 ...
 [1350.47      ]
 [1338.99      ]
 [1386.23      ]]


  This is separate from the ipykernel package so we can avoid doing imports until


In [61]:
## Create the dependent data set (y) ##
# Convert the dataframe to a numpy array (All of the values including the NaN)
y = np.array(df['Prediction'])
# Get all of the Y values except the last 'x' rows
y = y[:-forecast_day]
print(y)

[1.54166667e+00 1.51583333e+00 1.58833333e+00 ... 1.49556000e+03
 1.55586000e+03 1.49705000e+03]


In [62]:
# Split the data into 70% training and 30% testing
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)

In [63]:
# create and traing the Support Vector Regressor
svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1)
svr_rbf.fit(x_train, y_train)

SVR(C=1000.0, gamma=0.1)

In [64]:
# Test model: Score returns the coefficient of determination R^2 of the prediction
# The best posible score is 1.0 (100% accuracy)
svm_confidence = svr_rbf.score(x_test, y_test)
print("svm confidence: ", svm_confidence)

svm confidence:  0.9561814285942642


In [65]:
# Create and train a Linear regression Model
lr = LinearRegression()
# Train the model
lr.fit(x_train, y_train)

LinearRegression()

In [66]:
# Test model: Score returns the coefficient of determination R^2 of the prediction
# The best posible score is 1.0 (100% accuracy)
lr_confidence = lr.score(x_test, y_test)
print("lr confidence: ", lr_confidence)

lr confidence:  0.9876270313119935


In [67]:
# set x_forecast = the last 30 rows of the original data set from Adj. Close column
x_forecast = np.array(df.drop(['Prediction'], 1))[-forecast_day:]
print(x_forecast)

[[1414.51]
 [1451.05]
 [1461.76]
 [1448.69]
 [1468.35]
 [1482.92]
 [1484.76]
 [1500.  ]
 [1521.95]
 [1511.98]
 [1512.45]
 [1493.45]
 [1500.25]
 [1523.61]
 [1537.64]
 [1545.  ]
 [1551.86]
 [1578.89]
 [1598.39]
 [1588.18]
 [1591.  ]
 [1582.32]
 [1571.68]
 [1544.93]
 [1586.51]
 [1581.86]
 [1544.1 ]
 [1495.56]
 [1555.86]
 [1497.05]]


  


In [68]:
# Print Linear Regression Model predictions for the next 'x' days 
lr_prediction = lr.predict(x_forecast)
print(lr_prediction)

# Print Support Vector Regressor Model predictions for the next 'x' days 
svm_prediction = svr_rbf.predict(x_forecast)
print(svm_prediction)

[1499.37920913 1538.20234921 1549.58154545 1535.69488861 1556.58331045
 1572.06369225 1574.01866154 1590.21090716 1613.53241578 1602.93945719
 1603.43882435 1583.25164149 1590.47652798 1615.29613807 1630.20277888
 1638.02265603 1645.31129153 1674.03021535 1694.74863986 1683.90068529
 1686.89688822 1677.6745331  1666.3697107  1637.9482822  1682.12633816
 1677.18579078 1637.06642105 1585.49348127 1649.56122476 1587.0765814 ]
[1004.96937736 1550.55346538  668.25681069 1076.23025389  668.22356323
  668.22356323  668.22356323  668.22356323  668.22356323  668.22356323
  668.22356323  668.22356323  668.22356323  668.22356323  668.22356323
  668.22356323  668.22356323  668.22356323  668.22356323  668.22356323
  668.22356323  668.22356323  668.22356323  668.22356323  668.22356323
  668.22356323  668.22356323  668.22356323  668.22356323  668.22356323]
