# Predicting The Stock Market

I'll be using historical data on the price of the S&P500 Index to make predictions about future prices. Predicting whether an index will go up or down will help us forecast how the stock market as a whole will perform. 

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression  
from sklearn.metrics import mean_absolute_error
%matplotlib inline
import seaborn as sns
from datetime import datetime

In [2]:
stocks=pd.read_csv('sphist.csv')

In [3]:
# Sort the data by date in ascending order
stocks['Date']=pd.to_datetime(stocks['Date'])
stocks=stocks.sort_values(by='Date').reset_index().drop('index',axis=1)

## Features Creation 

In [4]:
# Calculate moving averages of 5, 30, and 365 days
for n in [5,30,365]:
    average_prices=[]
    for index,row in stocks.iterrows():
        if index<n:
            average_prices.append(np.nan)
        if index>=n:
            prior_prices=stocks['Close'][index-n:index]
            average_price=prior_prices.mean()
            average_prices.append(average_price)
    stocks[f'ma_{n}']=average_prices

# Calculate standard deviations of prices of prior 5, 30, and 365 days 
for n in [5,30,365]:
    std_devs=[]
    for index,row in stocks.iterrows():
        if index<n:
            std_devs.append(np.nan)
        if index>=n:
            prior_prices=stocks['Close'][index-n:index]
            std_dev=prior_prices.std()
            std_devs.append(std_dev)
    stocks[f'std_dev_{n}']=std_devs

stocks.dropna(inplace=True)
stocks['ma_5_365_ratio']=stocks['ma_5']/stocks['ma_365']
stocks['std_dev_5_365_ratio']=stocks['std_dev_5']/stocks['std_dev_365']

In [5]:
# Create train/test sets
train=stocks[stocks["Date"] < datetime(year=2013, month=1, day=1)]
test=stocks[stocks["Date"] >= datetime(year=2013, month=1, day=1)]

In [6]:
# Use linear regression to predict stock prices
features=['ma_5','ma_30','ma_365']
target='Close'
lr=LinearRegression()
lr.fit(train[features],train[target])
predictions=lr.predict(test[features])
mae=mean_absolute_error(test['Close'],predictions)
mae

16.14243964355485

In [7]:
# Fit the model with more features
features=['ma_5','ma_30','ma_365','std_dev_5','std_dev_30','std_dev_365']
target='Close'
lr=LinearRegression()
lr.fit(train[features],train[target])
predictions=lr.predict(test[features])
mae=mean_absolute_error(test['Close'],predictions)
mae

16.214387095799886