## Notebook to debug my stock predictor

In [2]:
"""
The goal of this project will use AI & ML to predict stock prices. I will a LLM for sentiment analysis and time series analysis to predict stock prices.
"""

# Import Libraries

# Financial Data
import yfinance as yf
from finvizfinance.quote import finvizfinance

# Data Manipulation
import pandas as pd
import numpy as np

# Data Modeling Library
#from statsmodels.tsa.statespace.sarimax import SARIMAX

# Machine Learning Library
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Charts
#import plotly.graph_objects as go
import matplotlib.pyplot as plt
import seaborn as sns

# Interactive Web App UI
#import streamlit as st

# Connect to OpenAI API
from openai import OpenAI
from config import my_sk

# setup api client
client = OpenAI(api_key=my_sk)

In [9]:
# Load the Dataset
us_tickers = pd.read_csv('data/us_tickers.csv')

In [17]:
us_tickers.columns = ['symbol', 'company']


ValueError: Length mismatch: Expected axis has 9 elements, new values have 2 elements

# Open AI API Call using the LLM to classify text

In [None]:
def classify_sentiment(title):
    response = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "You are a sentiment classifier."},
                {"role": "user", "content": f"Classify the sentiment as 'POSITIVE' or 'NEGATIVE' or 'NEUTRAL' with just that one word only, no additional words or reasoning: {title}"},
            ],
        max_tokens=1,
        n=1,
        temperature=0.5
    )
    return response.choices[0].message.content# Directly access the .content attribute


Check to see if the sentiment analysis is working

In [None]:
#testing the classify_sentiment function    
classify_sentiment("IBM had an amazing run but tanked at the end of the day")

Load all US pubblically traded companies into a dataframe

In [None]:
us_tickers = pd.read_csv('data/us_tickers.csv', header=None, names=['Company'])

-- Now we want to get the data

In [None]:
ticker = input("Enter a publically traded company name or stock ticker: ").lower()

Validate the user input

In [None]:
# Import Data using the APIs
# Data Pull
stock = finvizfinance(ticker) 
news_df = stock.ticker_news()



In [None]:
print(news_df.head())
news_df.describe()

In [None]:
# Preprocess before putting into LLM
news_df['Title'] = news_df['Title'].str.lower()

# Classify Sentiment function applied to each row of news_df
news_df['sentiment'] = news_df['Title'].apply(classify_sentiment)

# Postprocess after putting into LLM
news_df['sentiment'] = news_df['sentiment'].str.upper()
news_df = news_df[news_df['sentiment'] != 'NEUTRAL']
news_df['Date'] = pd.to_datetime(news_df['Date'])
news_df['DateOnly'] = news_df['Date'].dt.date

In [None]:
print(news_df.head())

-- Create the function to classify sentiment on the ticker

In [None]:
# Data Pull
#ticker = 'AAPL'
stock = finvizfinance(ticker) 
news_df = stock.ticker_news()

    # Preprocess before putting into LLM
news_df['Title'] = news_df['Title'].str.lower()

    # Classify Sentiment function applied to each row of news_df
news_df['sentiment'] = news_df['Title'].apply(classify_sentiment)

    # Postprocess after putting into LLM
news_df['sentiment'] = news_df['sentiment'].str.upper()
news_df = news_df[news_df['sentiment'] != 'NEUTRAL']
news_df['Date'] = pd.to_datetime(news_df['Date'])
news_df['DateOnly'] = news_df['Date'].dt.date


In [None]:
print(news_df.head())
news_df['sentiment'].value_counts()
news_df 

In [None]:
# Function to group and process sentiment data

# Reshape data to have df with columns: Date, # of positive Articles, # of negative Articles
grouped = news_df.groupby(['DateOnly', 'sentiment']).size().unstack(fill_value=0)
grouped = grouped.reindex(columns=['POS', 'NEG'], fill_value=0)

# Create rolling averages that count number of positive and negative sentiment articles within past t days
grouped['7day_avg_positive'] = grouped['POS'].rolling(window=7, min_periods=1).sum()
grouped['7day_avg_negative'] = grouped['NEG'].rolling(window=7, min_periods=1).sum()

# Create "Percent Positive" by creating percentage measure
grouped['7day_pct_positive'] = grouped['POS'] / (grouped['POS'] + grouped['NEG'])
result_df = grouped.reset_index()

print(result_df.head())
result_df

In [None]:


# Line chart for historical and forecasted stock prices
plt.figure(figsize=(14, 7))
plt.plot(stock_data['DateOnly'], stock_data['Close'], label='Historical Prices')
plt.plot(forecast.index, forecast.predicted_mean, label='Forecasted Prices', linestyle='--')
plt.xlabel('Date')
plt.ylabel('Stock Price')
plt.title('Historical and Forecasted Stock Prices')
plt.legend()
plt.show()

# Bar chart for sentiment counts
sentiment_counts = news_df['sentiment'].value_counts()
plt.figure(figsize=(10, 5))
sns.barplot(x=sentiment_counts.index, y=sentiment_counts.values)
plt.xlabel('Sentiment')
plt.ylabel('Count')
plt.title('Sentiment Counts')
plt.show()

# Rolling average chart for positive and negative sentiment
plt.figure(figsize=(14, 7))
plt.plot(result_df['DateOnly'], result_df['7day_avg_positive'], label='7-day Avg Positive Sentiment')
plt.plot(result_df['DateOnly'], result_df['7day_avg_negative'], label='7-day Avg Negative Sentiment')
plt.xlabel('Date')
plt.ylabel('Sentiment Count')
plt.title('7-day Rolling Average of Sentiment')
plt.legend()
plt.show()

# Percentage positive sentiment chart
plt.figure(figsize=(14, 7))
plt.plot(result_df['DateOnly'], result_df['7day_pct_positive'], label='7-day % Positive Sentiment')
plt.xlabel('Date')
plt.ylabel('Percentage')
plt.title('7-day Percentage of Positive Sentiment')
plt.legend()
plt.show()

In [None]:
#import matplotlib.pyplot as plt

print(result_df)

# Plot using matplotlib
result_df.plot(x='DateOnly', y=['7day_avg_positive', '7day_avg_negative'], kind='bar', stacked=True, figsize=(10, 5))
plt.show()

In [None]:
import yfinance as yf

ticker = 'AAPL'
start_date = '2024-11-21'
end_date = '2024-11-23'

stock_data = yf.download(ticker, start=start_date, end=end_date, ignore_tz=True)

if stock_data is not None and not stock_data.empty:
	stock_data['Pct_Change'] = stock_data['Close'].pct_change() * 100 
	print(stock_data)  # Display the downloaded data
else:
	print("No data was downloaded.")

In [None]:
print(stock_data.head())
stock_data.describe()