# Project: Apple Stock Price Prediction

> The challenge of this project is to accurately predict the future closing value of a given stock across a given period of time in the future

**Data Dictionarty**

1. Date
2. Open - It is the price at which the financial security opens in the market when trading begins
3. High - The high is the highest price at which a stock traded during a period.
4. Low - Low is the minimum price of a stock in a period
5. Close - Closing price generally refers to the last price at which a stock trades during a regular trading session
6. Adj Close - The adjusted closing price amends a stock's closing price to reflect that stock's value after accounting for any corporate actions. It is often used when examining historical returns or doing a detailed analysis of past performance.
7. Volume - Volume measures the number of shares traded in a stock or contracts traded in futures or options.

## Geting Started

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

## Get the data

In [None]:
stocks = pd.read_csv("AAPL_stock.csv")
stocks.head()

In [None]:
stocks.info()

In [None]:
stocks.isna().sum()

In [None]:
stocks['Date'] = pd.to_datetime(stocks['Date'])

In [None]:
print(f'Dataframe contains stock prices between {stocks.Date.min()} to {stocks.Date.max()}')
print(f'Total Days = {(stocks.Date.max() - stocks.Date.min()).days} days')

In [None]:
print(f'The maximum price of the stocks is:{stocks.Close.max()}')
print(f'The minimum price of the stocks is:{stocks.Close.min()}')

In [None]:
stocks.describe()

In [None]:
plt.style.use('fivethirtyeight')
fig, ax = plt.subplots(figsize=(10,6))
ax.boxplot(stocks[['Open','High','Low','Close','Adj Close']]);

In [None]:
fig,ax = plt.subplots(figsize=(20,10))
ax.plot(stocks['Date'],stocks['Close'],color='salmon');
ax.set(title='Stock Prices of Apple',
       xlabel='Date',
       ylabel='Price');

In [None]:
corr_matrix = stocks.corr()
corr_matrix

In [None]:
plt.figure(figsize=(8,6))
sns.heatmap(corr_matrix,
            annot=True, 
            linewidths=0.5, 
            fmt= ".4f", 
            cmap="Blues");

In [None]:
stocks2 = stocks.copy()
stocks2['open-high'] = stocks['Open']-stocks['High']
stocks2['open-low'] = stocks['Open'] - stocks['Low']
stocks2['close-high'] = stocks['Close']-stocks['High']
stocks2['close-low'] = stocks['Close'] - stocks['Low']
stocks2['high-low'] = stocks['High'] - stocks['Low']
stocks2['open-close'] = stocks['Open'] - stocks['Close']
stocks2 = stocks2.drop(['Open','High','Low','Close', 'Adj Close'],axis=1)
stocks2.head()

In [None]:
plt.figure(figsize=(8,6))
sns.heatmap(stocks2.corr(),cmap='Blues',annot=True)

In [None]:
stocks2= stocks.copy()
stocks2['Day'] = stocks['Date'].dt.day_name()
stocks2['Month'] = stocks['Date'].dt.month
stocks2['Year'] = stocks['Date'].dt.year
stocks2.head()

In [None]:
stocks2019 = stocks2[stocks2.Year==2019]
stocks2020 = stocks2[stocks2.Year==2020]

fig,(ax0,ax1) = plt.subplots(ncols=1,nrows=2,figsize=(20,20))

ax0.plot(stocks2020['Date'],stocks2020['Close'],color='salmon');
ax1.plot(stocks2019['Date'],stocks2019['Close'],color='lightblue')

ax0.set(title='Stock Prices of Apple in 2020',
       ylabel='Price');
ax1.set(title='Stock Prices of Apple in 2019',
        xlabel='Date',
        ylabel='Price');

fig.suptitle('Comparison of stock prices in 2019 and 2020',
            fontsize=24,
             fontweight='bold');

In [None]:
result = stocks2.groupby(['Year','Month'])

In [None]:
openPrice = pd.DataFrame(result['Open'].apply(lambda row:row.iloc[0]).reset_index())
openPrice.head()

In [None]:
closePrice = pd.DataFrame(result['Close'].apply(lambda row:row.iloc[-1]).reset_index())
closePrice.head()

In [None]:
diff = pd.merge(openPrice,closePrice,how='left',on=['Year','Month'])
diff['return'] = 100*(diff.Close - diff.Open)/diff.Open
diff=diff[1:493]
diff.head()

In [None]:
diff = diff.pivot(index='Year',columns='Month',values='return')
diff.columns = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
diff.head()

In [None]:
plt.figure(figsize=(19,22))
sns.heatmap(diff,
            annot=True, 
            linewidths=0.5, 
            fmt= ".2f", 
            cmap="YlGnBu");

In [None]:
#Building the regression model
from sklearn.model_selection import train_test_split

#For Preprocessing
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler

#For model evaluation `
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import r2_score

In [None]:
X = np.array(stocks.index).reshape(-1,1)
y = stocks.Close
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2)

In [None]:
#Importing the models
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor

In [None]:
#Put models in a dictionary
model = {"Linear Regression": LinearRegression(),
        "Random Forest": RandomForestRegressor()}

# Create function to fit and score models
def fit_and_score(models, X_train, X_test, y_train, y_test):
    """
    Fits and evaluates given machine learning models.
    models : a dict of different Scikit-Learn machine learning models
    X_train : training data
    X_test : testing data
    y_train : labels assosciated with training data
    y_test : labels assosciated with test data
    """
    
    # Random seed for reproducible results
    np.random.seed(42)
    # Make a list to keep model scores
    model_scores = {}
    # Loop through models
    for name, model in models.items():
        # Fit the model to the data
        model.fit(X_train, y_train)
        # Evaluate the model and append its score to model_scores
        model_scores[name] = model.score(X_test, y_test)
    return model_scores

In [None]:
model_scores = fit_and_score(model,X_train,X_test,y_train,y_test)

In [None]:
model_scores

In [None]:
model_compare = pd.DataFrame(model_scores,index =['r2_score'])
model_compare

In [None]:
model_compare.T.plot.bar(figsize=(4,6),color='salmon')
plt.xlabel('Model Name')
plt.ylabel('r^2 score')
plt.xticks(rotation=0);

As random forest Regressor has morre r^2 value, Therefore we will be using the random forest regressor.

In [None]:
np.random.seed(42)
model = RandomForestRegressor().fit(X_train,y_train)

In [None]:
y_preds = model.predict(X_test)

In [None]:
#Model Evaluation
r2_score(y_test,y_preds)

In [None]:
mse(y_test,y_preds)