In [None]:
from IPython.display import display, HTML

<img src="https://webassets.unt.edu/assets/branding/unt-stacked-logo.svg" alt="UNT | University of North Texas" class="desktop-logo" width="300" height="500">


<div style="display: flex; justify-content: space-around; padding: 20px 40px 20px 20px; background-color: #f4f4f9; border-radius: 10px;">
  <!-- Team Member 1 -->
  <div style="text-align: center; background-color: #ffffff; border-radius: 10px; padding: 20px; box-shadow: 0 4px 8px rgba(0,0,0,0.1); width: 150px;">
    <img src="https://via.placeholder.com/100" style="border-radius: 50%; width: 100px; height: 100px;">
    <h3 style="font-family: Arial, sans-serif; color: #333;">Sonali Sabnam</h3>
    <p style="font-family: Arial, sans-serif; color: #666;">Da Boss</p>
  </div>
  
  <!-- Team Member 2 -->
  <div style="text-align: center; background-color: #ffffff; border-radius: 10px; padding: 20px; box-shadow: 0 4px 8px rgba(0,0,0,0.1); width: 150px;">
    <img src="https://via.placeholder.com/100" style="border-radius: 50%; width: 100px; height: 100px;">
    <h3 style="font-family: Arial, sans-serif; color: #333;">Sonam Pohuja</h3>
    <p style="font-family: Arial, sans-serif; color: #666;">Da Other Boss</p>
  </div>

  <!-- Team Member 3 -->
  <div style="text-align: center; background-color: #ffffff; border-radius: 10px; padding: 20px; box-shadow: 0 4px 8px rgba(0,0,0,0.1); width: 150px;">
    <img src="https://via.placeholder.com/100" style="border-radius: 50%; width: 100px; height: 100px;">
    <h3 style="font-family: Arial, sans-serif; color: #333;">Luis Garcia Fuentes</h3>
    <p style="font-family: Arial, sans-serif; color: #666;">Da Cool Guy</p>
  </div>

  <!-- Team Member 4 -->
  <div style="text-align: center; background-color: #ffffff; border-radius: 10px; padding: 20px; box-shadow: 0 4px 8px rgba(0,0,0,0.1); width: 150px;">
    <img src="https://via.placeholder.com/100" style="border-radius: 50%; width: 100px; height: 100px;">
    <h3 style="font-family: Arial, sans-serif; color: #333;">Young Yu</h3>
    <p style="font-family: Arial, sans-serif; color: #666;">Da Janitor</p>
  </div>
</div>

# Back to the Futures: The AI Stock Predictor
Back to the Futures is a project where we try to do the impossible—predict the stock market with AI! <br>
Because if there’s one thing the stock market loves, it’s being perfectly predictable.  <br>
(spoiler: it’s not). <br>
- Our goal? To use machine learning to turn volatility into victory.
- Will it work? Well, let’s just say our fallback plan involves a lot of ramen.

In [None]:
# Add your own imports here
import os
import pandas as pd
from transformers import pipeline
from dotenv import load_dotenv


# Do Not Edit Below the lines
#-------------------------------------------------------------------------------------------------------------------------------------------------------------
#-------------------------------------------------------------------------------------------------------------------------------------------------------------
from utilities.api import *
from utilities.classify import *
from utilities.common import *

In [None]:
'''
Put your secrets here.
'''

#-------------------------------------------------------------------------------------------------------------------------------------------------------------
load_dotenv()
api_key = os.getenv('API_KEY')

In [None]:
'''
We're just setting the GPU here, if you don't have one... no worries, I don't either.
It will fall back to your CPU :)
'''

#-------------------------------------------------------------------------------------------------------------------------------------------------------------
device = set_device(use_gpu=True)
print(f"Using {device}")

In [None]:
'''
This is where we define our pre-trained model. The one below is just a place holder.
'''

#-------------------------------------------------------------------------------------------------------------------------------------------------------------
# Need someone to train this model
classifier = pipeline("zero-shot-classification", 
                      model="facebook/bart-large-mnli",
                      device=device
                      )

In [None]:
'''
This is where we define our pre-trained model. The one below is just a place holder.
'''

#-------------------------------------------------------------------------------------------------------------------------------------------------------------
# Need someone to train this model
sentiment_classifier = pipeline("sentiment-analysis", 
                                model="distilbert-base-uncased-finetuned-sst-2-english",
                                device=device
                                )

In [None]:
'''
We need to pick a company in the S&P 500 to train our model on.
'''

#-------------------------------------------------------------------------------------------------------------------------------------------------------------
eval_start = '2024-09-11'
eval_stop = '2024-09-15'
company = 'Tesla'
ticker = 'TSLA'

In [None]:
'''
This function gets the news. There are limitations. Check out the README.md
'''

#-------------------------------------------------------------------------------------------------------------------------------------------------------------
articles = get_news(api_key, company, eval_start, eval_stop)
# for article in articles:
#     print(f"Title: {article['title']}")
#     print(f"Published: {article['publishedAt']}")
#     print(f"Content: {article['content']}")

In [None]:
'''
This is where we call the classifier model to filter out the financial news from the garbage.
'''

#-------------------------------------------------------------------------------------------------------------------------------------------------------------
filtered_df = [article for article in articles if is_financial_article(article, classifier)]

In [None]:
'''
We'll take the output of the classifier above and classify it as Positive, Neutral, or Negative.
In other words, 1,0, -1. This also gives a confidence score that we'll use as a weight to multiply
against the sentiment. For example, -1 (bad news) * 0.97 (This how confident the sentiment model is that it is bad news).
So, we get a -0.97 final score.
'''

#-------------------------------------------------------------------------------------------------------------------------------------------------------------
data = []
for article in articles:
    sentiment = article_sentiment(article, sentiment_classifier)
    data.append({
        'date': article.get('publishedAt', ''),
        'title': article['title'],
        'content': article.get('content', ''),
        'sentiment_score': sentiment    
    })

# Convert to DataFrame
sentiment_df = pd.DataFrame(data)

# Print the sentiment results
# for article, sentiment in zip(articles, sentiment_results):
#     print(f"Article Title: {article['title']}")
#     print(f"Weighted Sentiment: {sentiment}")

In [None]:
'''
Here we use our trusty yfinance module to get stocks. The variables are already defined above.
Notice we are pulling the same date ranges as the news articles.
'''

#-------------------------------------------------------------------------------------------------------------------------------------------------------------
stock_df = get_stocks(ticker, eval_start, eval_stop)

In [None]:
'''
We are going to join the two dataframes on "date" column. In order to do that,
we need the datetime formats to match. Basically, it converts the column names in
the dataframes to match, reformats the datetime, then trims off the time component.
'''

#-------------------------------------------------------------------------------------------------------------------------------------------------------------
df_stocks = format_date(stock_df)
df_sentiment = format_date(sentiment_df)

In [None]:
'''
Okay, here we merge everything on date. The fillna is there just in case one of the api's 
returns a null.
'''

#-------------------------------------------------------------------------------------------------------------------------------------------------------------
merged_data = pd.merge(df_stocks, df_sentiment, on='date', how='outer').fillna(0)

In [None]:
'''
Here we drop some columns we don't plan on using. Regression models aren't fans on non-numerical data.
'''

#-------------------------------------------------------------------------------------------------------------------------------------------------------------
merged_data.drop(columns=['title','content'], axis=1, inplace=True)

In [None]:
'''
This is defining a dictionary structure that I'll pass to a function. On some days, you'll get several news articles.
Our prediction is based on the aggregation of all financial news per day.
'''

#-------------------------------------------------------------------------------------------------------------------------------------------------------------
agg_columns = {
    'Open': 'mean',
    'High': 'mean',
    'Low': 'mean',
    'Close': 'mean',
    'Adj Close': 'mean',
    'Volume': 'mean',  
    'sentiment_score': 'mean', 
    'sentiment_score': 'count' 
}

In [None]:
'''
We pass the dict along with the dataframe to the function. Your output should be 1 row per day.
'''

#-------------------------------------------------------------------------------------------------------------------------------------------------------------
aggregated_df = aggregate_column(merged_data, agg_columns)

# Work in Progress

In [None]:

# # Convert stock data to DataFrame
# stock_df = pd.DataFrame(stock_data)

# # Convert article sentiment data to DataFrame
# sentiment_df = pd.DataFrame(article_sentiment_data)
# sentiment_df['date'] = pd.to_datetime(sentiment_df['date'])

# # Aggregate sentiment by date (e.g., take the average sentiment per day)
# aggregated_sentiment = sentiment_df.groupby('date')['sentiment'].mean().reset_index()

# # Merge stock data with aggregated sentiment data
# df = pd.merge(stock_df, aggregated_sentiment, on='date', how='left')

# # Fill in article_count to ensure it matches
# df['article_count'] = df['article_count'].fillna(0)

# # Add cumulative factor for days without news
# df['cumulative_sentiment'] = df['sentiment']
# days_since_news = 0

# for i in range(1, len(df)):
#     if np.isnan(df.loc[i, 'sentiment']):
#         # No news, carry forward the last sentiment and increase its weight
#         df.loc[i, 'cumulative_sentiment'] = df.loc[i-1, 'cumulative_sentiment'] * (1 + days_since_news)
#         days_since_news += 1
#     else:
#         # Reset days since news if there's new news
#         days_since_news = 0

In [None]:

# # a simple LSTM model
# class StockPredictor(nn.Module):
#     def __init__(self):
#         super(StockPredictor, self).__init__()
#         self.lstm = nn.LSTM(input_size=2, hidden_size=64, batch_first=True)
#         self.fc = nn.Linear(64, 1)

#     def forward(self, x):
#         lstm_out, _ = self.lstm(x)
#         output = self.fc(lstm_out[:, -1, :])  # Predict using the last time step's output
#         return output

# # Instantiate and train model 
# # need to decide on an optimizer and loss function
# model = StockPredictor()


https://pypi.org/project/transformers/