## Importing Dependencies

In [1]:
import psycopg2
import os
import pandas as pd
import torch
from transformers import BertForSequenceClassification, BertTokenizer
from tqdm.notebook import tqdm
from concurrent.futures import ThreadPoolExecutor

## Connecting to Supabase

In [2]:
def get_connection():
    try:
        connection = psycopg2.connect(
            dbname=os.getenv('DB_NAME'),
            user=os.getenv('DB_USER'),
            password=os.getenv('DB_PASSWORD'),
            host=os.getenv('DB_HOST'),
            port=os.getenv('DB_PORT'),
        )

    except psycopg2.OperationalError as e:
        print(f'Error: {e}')
        return None

    else:
        print('Connected Established!')
        return connection

In [3]:
conn = get_connection()

Connected Established!


## Extracting headlines

In [4]:
def extract_headlines():
    with conn.cursor() as curr:
        curr.execute("SELECT * FROM headlines")
        headlines = curr.fetchall()
        columns = [curr.description[i][0] for i in range(len(curr.description))]
        df = pd.DataFrame(headlines, columns=columns)
    return df


In [5]:
headlines = extract_headlines()

In [6]:
headlines.head(10)

Unnamed: 0,publication_timestamp,ticker,headline,is_daily,is_weekly,is_monthly,sentiment
0,2024-03-14 07:06:00,HES,Hess Corp's Dividend Analysis,False,False,True,neutral
1,2024-04-04 09:10:00,PEP,PepsiCo,False,True,True,neutral
2,2024-03-19 19:24:00,ROL,"Analyst Report: Rollins, Inc.",False,False,True,neutral
3,2024-03-19 17:47:00,WELL,Analyst Report: Welltower Inc.,False,False,True,neutral
4,2024-04-08 07:02:00,ORCL,Oracle Corp's Dividend Analysis,False,True,True,neutral
5,2024-04-03 20:53:00,COST,Costco launches weight-loss program for members,False,False,True,neutral
6,2024-03-28 13:00:00,BLK,BlackRock CEO Larry Fink Expresses Serious Con...,False,False,True,neutral
7,2024-03-28 11:54:00,BLK,BlackRock CEO Larry Fink tells a 'tale of two ...,False,False,True,neutral
8,2024-03-28 10:24:00,BLK,BlackRock's (BLK) Restructuring Efforts Aid Am...,False,False,True,neutral
9,2024-03-28 09:12:00,BLK,UPDATE 1-US cryptocurrency ETF inflows pick up...,False,False,True,positive


## Loading model and tokenizer

In [7]:
tokenizer = BertTokenizer.from_pretrained(r'models\tokenizer')
model = BertForSequenceClassification.from_pretrained(r'models\finetuned_finbert')

In [8]:
X = headlines['headline']
encodings = tokenizer(X.tolist(), truncation=True, padding=True)

## Preparing dataset

In [9]:
class Dataset(torch.utils.data.Dataset):
    def __init__(self, encodings):
        self.encodings = encodings

    def __getitem__(self, idx):
        item = {key : torch.tensor(val[idx]) for key, val in self.encodings.items()}
        return item
    
    def __len__(self):
        return len(self.encodings['input_ids'])

In [10]:
dataset = Dataset(encodings)

## Predicting sentiments

In [None]:
def analyze_sentiment(dataset):
    model.eval()
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=8)

    predictions = []
    probabilities = []

    with torch.no_grad():
        for batch in tqdm(dataloader, desc='Sentiment Analysis', unit="batch"):
            input_ids = batch['input_ids']
            attention_mask = batch['attention_mask']

            with torch.no_grad():
                outputs = model(input_ids, attention_mask=attention_mask)
                logits = outputs.logits
                probabilities_batch = torch.softmax(logits, dim=1)
                predicted_labels_batch = torch.argmax(probabilities_batch, dim=1)
            
            predictions.extend(predicted_labels_batch.tolist())
            probabilities.extend(probabilities_batch.tolist())

    return predictions, probabilities

In [None]:
predicted, probabilities = analyze_sentiment(dataset)

In [11]:
%store -r predicted

In [12]:
label_map = {0: 'negative', 1: 'neutral', 2: 'positive'}
predicted_sentiments = [label_map[label] for label in predicted]
data = {'headline': X.tolist(), 'sentiment': predicted_sentiments}
results = pd.DataFrame(data)

In [13]:
results

Unnamed: 0,headline,sentiment
0,Hess Corp's Dividend Analysis,neutral
1,PepsiCo,neutral
2,"Analyst Report: Rollins, Inc.",neutral
3,Analyst Report: Welltower Inc.,neutral
4,Oracle Corp's Dividend Analysis,neutral
...,...,...
14339,IBM stock nears an all-time highand it may hav...,neutral
14340,12 Best Alternatives to a 4-Year College,neutral
14341,Forget Meta Platforms: 2 Artificial Intelligen...,neutral
14342,IBM Eyes First Record Since 2013 as AI Optimis...,positive


## Updating sentiments for individual headlines 

In [None]:
def update_headlines(rows):
    try:
        with conn.cursor() as curr:
            data = [(row['sentiment'], row['headline']) for _, row in rows.iterrows()]
            curr.executemany("UPDATE headlines SET sentiment = %s WHERE headline = %s", data)
        return True
    except Exception as e:
        print(f"Error updating headline: {e}")
        return False

In [None]:
with tqdm(total=len(results), desc="Updating headlines", unit="rows") as pbar:
    for _, row in results.iterrows():
        update_headlines(pd.DataFrame([row]).copy())
        pbar.update(1)
    
conn.commit()

## Aggregating sentiments for stocks across multiple time periods

In [41]:
def extract_headlines():
    with conn.cursor() as curr:
        curr.execute("SELECT * FROM headlines")
        headlines = curr.fetchall()
        columns = [curr.description[i][0] for i in range(len(curr.description))]
        df = pd.DataFrame(headlines, columns=columns)
    return df

In [48]:
df = extract_headlines()
grouped = df.groupby(['ticker', 'is_daily', 'is_weekly', 'is_monthly'])

aggregated_sentiments = {}
for group, data in grouped:
    ticker, is_daily, is_weekly, is_monthly = group
    sentiment_counts = data['sentiment'].value_counts()
    predominant_sentiment = sentiment_counts.idxmax()
    aggregated_sentiments.setdefault(ticker, {})[(is_daily, is_weekly, is_monthly)] = predominant_sentiment

In [70]:
def aggregate_sentiments_by_company(headlines):
    aggregated_sentiments = {}

    for _, row in headlines.iterrows():
        ticker = row['ticker']

        if ticker not in aggregated_sentiments:
            aggregated_sentiments[ticker] = {}

        period = (row['is_daily'], row['is_weekly'], row['is_monthly'])
        sentiment = row['sentiment']

        aggregated_sentiments[ticker][period] = sentiment

    return aggregated_sentiments

In [71]:
def update_stocks_table(aggregated_sentiments):
    with conn.cursor() as cursor:
        for ticker, sentiments in aggregated_sentiments.items():
            daily_sentiment = sentiments.get((True, True, True), None)
            weekly_sentiment = sentiments.get((False, True, True), None)
            monthly_sentiment = sentiments.get((False, False, True), None)

            if daily_sentiment is None:
                daily_sentiment = weekly_sentiment if weekly_sentiment is not None else monthly_sentiment
            if weekly_sentiment is None:
                weekly_sentiment = monthly_sentiment if monthly_sentiment is not None else daily_sentiment
            if monthly_sentiment is None:
                monthly_sentiment = weekly_sentiment if weekly_sentiment is not None else daily_sentiment

            cursor.execute("UPDATE stocks SET daily_sentiment = %s, weekly_sentiment = %s, monthly_sentiment = %s WHERE ticker = %s",
                                (daily_sentiment, weekly_sentiment, monthly_sentiment, ticker))
            print(f"Updated sentiments for Ticker: {ticker}")
        
    conn.commit()
    print("Sentiments updated successfully.")

In [72]:
update_stocks_table(aggregated_sentiments)

Updated sentiments for Ticker: A
Updated sentiments for Ticker: AAL
Updated sentiments for Ticker: AAPL
Updated sentiments for Ticker: ABBV
Updated sentiments for Ticker: ABNB
Updated sentiments for Ticker: ABT
Updated sentiments for Ticker: ACGL
Updated sentiments for Ticker: ACN
Updated sentiments for Ticker: ADBE
Updated sentiments for Ticker: ADI
Updated sentiments for Ticker: ADM
Updated sentiments for Ticker: ADP
Updated sentiments for Ticker: ADSK
Updated sentiments for Ticker: AEE
Updated sentiments for Ticker: AEP
Updated sentiments for Ticker: AES
Updated sentiments for Ticker: AFL
Updated sentiments for Ticker: AIG
Updated sentiments for Ticker: AIZ
Updated sentiments for Ticker: AJG
Updated sentiments for Ticker: AKAM
Updated sentiments for Ticker: ALB
Updated sentiments for Ticker: ALGN
Updated sentiments for Ticker: ALL
Updated sentiments for Ticker: ALLE
Updated sentiments for Ticker: AMAT
Updated sentiments for Ticker: AMCR
Updated sentiments for Ticker: AMD
Updated sen