In [None]:
pip install pandas scikit-learn bokeh ipywidgets prettytable numpy plt

In [21]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import numpy as np
from bokeh.models import ColumnDataSource, Label
from bokeh.plotting import figure, output_notebook, show
from bokeh.models.tools import HoverTool
from datetime import datetime
import ipywidgets as widgets
from IPython.display import display
from prettytable import PrettyTable
from sklearn.metrics import r2_score
import matplotlib.pyplot as plt



In [44]:
google_data = pd.read_csv('./csv/google.csv')
apple_data = pd.read_csv('./csv/apple.csv')
amazon_data = pd.read_csv('./csv/amazon.csv')
meta_data = pd.read_csv('./csv/meta.csv')
netflix_data = pd.read_csv('./csv/netflix.csv')

stocks_data = {
    'Google': google_data,
    'Apple': apple_data,
    'Amazon': amazon_data,
    'Meta': meta_data,
    'Netflix': netflix_data
}

window_size = 20
for stock, data in stocks_data.items():
    data.fillna(data.drop(columns=['Date']).mean(), inplace=True)
    data['MA_Close'] = data['Close'].rolling(window=window_size).mean()
    data.dropna(inplace=True)


In [50]:
# Function to train and evaluate linear regression model
def train_linear_regression(data):
    X = data[['Open', 'High', 'Low', 'Volume', 'MA_Close']]
    y = data['Close']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    model = LinearRegression()
    model.fit(X_train, y_train)

    # Evaluate the model
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)

    return model, rmse


# Train models for each stock
models = {}
for stock, data in stocks_data.items():
    model, rmse = train_linear_regression(data)
    models[stock] = {'model': model, 'rmse': rmse}
    
    model = models[stock]['model']
    data = stocks_data[stock]
    predictions = model.predict(data[['Open', 'High', 'Low', 'Volume', 'MA_Close']])
    stocks_data[stock]['Predicted'] = predictions
    print(f'R-squared (R2) score for {stock}: {r2_score(data["Close"], data["Predicted"]):.15f}')


R-squared (R2) score for Google: 0.999856210132403
R-squared (R2) score for Apple: 0.999889589092337
R-squared (R2) score for Amazon: 0.999845946256504
R-squared (R2) score for Meta: 0.999759190851619
R-squared (R2) score for Netflix: 0.999763343405629


In [54]:
# Function to format data into a PrettyTable
def format_pretty_table(selected_stock, user_date, user_prediction, close_price, user_data):
    x = PrettyTable()
    x.field_names = ["Stock", "Date", "Predicted Price (USD)", "Close (USD)", "Open (USD)", "High (USD)", "Low (USD)", "20DMA (USD)"]
    x.add_row([selected_stock, user_date.strftime('%Y-%m-%d'), f"{user_prediction:.2f}", f"{close_price:.2f}", f"{user_data[0][0]:.2f}", f"{user_data[0][1]:.2f}", f"{user_data[0][2]:.2f}", f"{user_data[0][4]:.2f}"])
    return x

# Function to predict future close price using linear regression model
def predict_future_close_price(lin_model, days_to_predict, user_date, stocks_data, selected_stock):
    future_date_number = len(stocks_data[selected_stock][stocks_data[selected_stock]['Date'] <= user_date]) + days_to_predict
    predicted_price = lin_model.intercept_ + lin_model.coef_[0] * future_date_number
    return predicted_price
    

In [55]:
output_notebook()

stock_dropdown = widgets.Dropdown(
    options=list(stocks_data.keys()),
    value=list(stocks_data.keys())[0],
    description='Stock:',
)

date_picker = widgets.DatePicker(
    description='Pick a Date',
)

button = widgets.Button(description="Get Predicted Price")
output = widgets.Output()
days_input = widgets.BoundedIntText(value=1, min=1, max=365, step=1, description='Days to Predict:', style={'description_width': 'initial'})


# Function to get user input and display predicted price
def get_predicted_price(button):
    # Clear the output widget for each new prediction
    output.clear_output()
    selected_stock = stock_dropdown.value
    user_date = date_picker.value
    data = stocks_data[selected_stock]

    
    if user_date is None:
        with output:
            print("Please select a date.")
        return
   
    user_date = pd.Timestamp(user_date)
    stocks_data[selected_stock]['Date'] = pd.to_datetime(stocks_data[selected_stock]['Date'], format='%m/%d/%y')

    if user_date not in stocks_data[selected_stock]['Date'].values:
        with output:
            print(f"Data for the selected date ({user_date.strftime('%Y-%m-%d')}) is not available.")
        return

    
    # Find and extract data from dataset for the selected date
    model = models[selected_stock]['model']
    user_data = stocks_data[selected_stock][stocks_data[selected_stock]['Date'] == user_date]
    user_data = [[user_data[col].values[0] for col in ['Open', 'High', 'Low', 'Volume', 'MA_Close']]]

    
    # Make a prediction
    user_prediction = model.predict(user_data)[0]
    predictions = model.predict(data[['Open', 'High', 'Low', 'Volume', 'MA_Close']])
    data['Predicted'] = predictions
    data['Date'] = pd.to_datetime(data['Date'], format='%m/%d/%y')

    
    # Price Prediction Detail Chart
    inc = data.Close > data.Open
    dec = data.Open > data.Close
    w = 12*60*60*1000  # half day in ms
    p = figure(x_axis_type="datetime", width=900, height=500, title=f'{selected_stock} Stock Price Prediction')
    p.segment(data['Date'], data.High, data['Date'], data.Low, color="black")
    p.vbar(data['Date'][inc], w, data.Open[inc], data.Close[inc], fill_color="lawngreen", line_color="lime", legend_label='Green Day')
    p.vbar(data['Date'][dec], w, data.Open[dec], data.Close[dec], fill_color="tomato", line_color="red", legend_label='Red Day')
    p.scatter(x='Date', y='Predicted', source=ColumnDataSource(data), size=5, color='blue', legend_label='Predicted')
    p.yaxis.axis_label = "Price ($)"
    p.xaxis.axis_label = "Date"

    
    # Moving Average vs Actual Price Over Time Chart
    p2 = figure(x_axis_type="datetime", width=900, height=500, title=f'{selected_stock} Moving Average vs Actual Price Over Time')
    p2.line(data['Date'], data['Close'], color='blue', legend_label='Actual Price')
    p2.line(data['Date'], data['MA_Close'], color='orange', legend_label='Moving Average')
    hover = HoverTool(
        tooltips=[
            ("Date", "@x{%F}"),
            ("Price", "@y{0.2f}")
        ],
        formatters={
            "@x": "datetime",
        },
        mode='vline'
    )
    p2.add_tools(hover)
    p2.yaxis.axis_label = "Price ($)"
    p2.xaxis.axis_label = "Date"

    
    # Calculate Prediction Errors Chart
    errors = data['Predicted'] - data['Close']
    hist, edges = np.histogram(errors, bins=50)
    p3 = figure(title=f'{selected_stock} Distribution of Prediction Errors', x_axis_label='Prediction Error', y_axis_label='Frequency')
    p3.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:], fill_color='purple', line_color='black')


    # Fit linear regression model
    df2 = stocks_data[selected_stock]
    df2 = data.set_index(pd.DatetimeIndex(df2['Date'].values))
    df2['Numbers'] = list(range(0, len(df2)))
    X = np.array(df2[['Numbers']])
    y = df2['Close'].values
    lin_model = LinearRegression().fit(X, y)
    y_pred = lin_model.predict(X)
    df2['Pred'] = y_pred
    df2[['Date', 'Close', 'Pred']].plot(x='Date', y=['Close', 'Pred'])
    plt.title('Close Price History')


    # Display the prices in table
    days_to_predict = days_input.value
    user_date_reformatted = user_date.strftime('%m/%d/%y')
    close_price = stocks_data[selected_stock].loc[stocks_data[selected_stock]['Date'] == user_date_reformatted, 'Close'].values[0]
    x = format_pretty_table(selected_stock, user_date, user_prediction, close_price, user_data)
    predicted_price = predict_future_close_price(lin_model, days_to_predict, user_date, stocks_data, selected_stock)

    
    with output:
        print(x)
        print(f"Predicted Price for {days_to_predict} day(s) after {user_date.strftime('%Y-%m-%d')}: {predicted_price}")
        plt.show()
        show(p, notebook_handle=True)
        show(p2, notebook_handle=True)
        show(p3, notebook_handle=True)


# Set the function to be called when the button is clicked
button.on_click(get_predicted_price)

# Display the widgets
display(stock_dropdown, date_picker, days_input, button, output)

Dropdown(description='Stock:', options=('Google', 'Apple', 'Amazon', 'Meta', 'Netflix'), value='Google')

DatePicker(value=None, description='Pick a Date', step=1)

BoundedIntText(value=1, description='Days to Predict:', max=365, min=1, style=DescriptionStyle(description_wid…

Button(description='Get Predicted Price', style=ButtonStyle())

Output()