In [None]:
import pandas as pd
import pandas_datareader.data as web
import yfinance as yf
import matplotlib.pyplot as plt
import requests
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.preprocessing import LabelEncoder
import logging
from simpletransformers.classification import ClassificationModel, ClassificationArgs
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np
import openai
from tqdm import tqdm
import time
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from transformers import logging, pipeline
from summarizer import Summarizer
import os
import warnings
import seaborn as sns

# Download Data

In [None]:
df = pd.read_csv('raw_partner_headlines.csv')

In [None]:
df.head()

# Data Description

In [None]:
# General info
df.info()

In [None]:
# Publishers
df.groupby('publisher').size()

In [None]:
# Companies
df.groupby('stock').size()

# Filtering Data

In [None]:
# Uploading S&P companies
def get_sp500_symbols():
    table = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
    sp500 = table[0]
    symbols = sp500['Symbol'].tolist()
    return symbols

sp500_symbols = get_sp500_symbols()

In [None]:
# Filtering Data
filtered_df = df[df['stock'].isin(sp500_symbols)]
filtered_df_10 = filtered_df.groupby('stock').size().sort_values(ascending=False)[:10]
print(filtered_df_50)

In [None]:
# Number of filtered news
filtered_df_10.sum()

In [None]:
stock_symbols_list = filtered_df_10.index.tolist()
print(stock_symbols_list)

In [None]:
filtered_df = df[df['stock'].isin(stock_symbols_list)]
filtered_df = filtered_df.reset_index(drop=True)
filtered_df = filtered_df.drop(['Unnamed: 0', 'url', 'publisher'], axis=1)
filtered_df['date'] = pd.to_datetime(filtered_df['date'])
filtered_df['date'] = filtered_df['date'].dt.date
filtered_df.head()

In [None]:
end_date = filtered_df['date'].max().strftime('%Y-%m-%d')
start_date = filtered_df['date'].min().strftime('%Y-%m-%d')
print(end_date)
print(start_date)

In [None]:
# Getting data for S&P
spy = yf.Ticker("SPY")
hist_spy = spy.history(start=start_date, end=end_date)

# Calculating daily change
hist_spy['Daily Change'] = round(((hist_spy['Close'] - hist_spy['Open'])/hist_spy['Open']) * 100, 2)
hist_spy.head()

In [None]:
# List to store DataFrame for each symbol
dfs = []

for symbol in stock_symbols_list:
    ticker = yf.Ticker(symbol)
    hist = ticker.history(start=start_date, end=end_date)
    hist['Daily Change'] = round(((hist['Close'] - hist['Open'])/hist['Open']) * 100, 2)
    daily_change_diff = hist['Daily Change'] - hist_spy['Daily Change']
    df2 = pd.DataFrame(daily_change_diff)
    df2.reset_index(inplace=True)
    df2['Date'] = pd.to_datetime(df2['Date'])
    df2['Date'] = df2['Date'].dt.date
    
    df_filtered = filtered_df[filtered_df['stock'] == symbol]
    merged_df = pd.merge(df_filtered, df2, left_on='date', right_on='Date', how='left')
    merged_df.drop('Date', axis=1, inplace=True)
    dfs.append(merged_df)

final_df = pd.concat(dfs, ignore_index=True)

final_df = final_df.dropna(subset=['Daily Change'])
final_df = final_df.reset_index(drop=True)

final_df.head()

In [None]:
def categorize_change(value, threshold):
    if value > threshold:
        return 'positive'
    elif value <= -threshold:
        return 'negative'
    else:
        return 'neutral'

In [None]:
threshold = 0.4
final_df['Change Category'] = final_df['Daily Change'].apply(lambda x: categorize_change(x, threshold))

In [None]:
final_df.head(15)

In [None]:
category_counts = final_df.groupby('Change Category').size()

In [None]:
category_counts.plot(kind='bar', color=['red', 'blue', 'green'])

plt.title('Counts by Change Category')
plt.xlabel('Change Category')
plt.ylabel('Counts')
plt.xticks(rotation=0)
plt.show()

In [None]:
df = final_df.copy()

In [None]:
df.head()

In [None]:
len(df)

## Prepating Data

In [None]:
# Rename columns to match expected input ('text' and 'labels')
df['text'] = df['headline']  # Copying headline to a new column named 'text'
df['label'] = df['Change Category']  # Renaming 'Change Category' for clarity

# Label encoding for 'Change Category'
le = LabelEncoder()
df['labels'] = le.fit_transform(df['label'])

# Split the dataset into training and testing sets
train_df, test_df = train_test_split(df[['text', 'labels']], test_size=0.2, random_state=42)

# Keep only the necessary columns in the train and test sets
train_df = train_df[['text', 'labels']]
test_df = test_df[['text', 'labels']]

## BERT

In [None]:
# Define model arguments
model_args = ClassificationArgs(
    num_train_epochs=5,
    use_multiprocessing=False,
    use_multiprocessing_for_evaluation=False,
    output_dir='output',
    overwrite_output_dir=True
)

# Initialize a ClassificationModel using RoBERTa
model = ClassificationModel(
    "bert",  # Specify the model type as RoBERTa
    "bert-base-uncased",  # Use a RoBERTa base model. Adjust the model checkpoint as needed.
    num_labels=len(le.classes_),
    args=model_args,
    use_cuda=True
)

train_loss_list = []

# Train the model on the training dataset
model.train_model(train_df)

In [None]:
# Predict on the test dataset
predictions, raw_outputs = model.predict(test_df['text'].tolist())

# Calculate accuracy
accuracy = accuracy_score(test_df['labels'], predictions)
print(f"Accuracy: {accuracy}")

In [None]:
# For example purposes, let's say we have the following:
actual_labels = test_df['labels']
predicted_labels = predictions  # From your model's predictions on the test set

# Calculate precision, recall, F1-score, and support for each class
report = classification_report(actual_labels, predicted_labels, target_names=le.classes_)
print("Classification Report:\n", report)

# Calculate and display the confusion matrix
cm = confusion_matrix(actual_labels, predicted_labels)
print("Confusion Matrix:\n", cm)

cm_df = pd.DataFrame(cm, index=le.classes_, columns=le.classes_)
print("Confusion Matrix with Labels:\n", cm_df)

## Roberta

In [None]:
# Define model arguments
model_args = ClassificationArgs(
    num_train_epochs=5,
    use_multiprocessing=False,
    use_multiprocessing_for_evaluation=False,
    output_dir='output',
    overwrite_output_dir=True
)

# Initialize a ClassificationModel using RoBERTa
model = ClassificationModel(
    "roberta",  # Specify the model type as RoBERTa
    "roberta-base",  # Use a RoBERTa base model. Adjust the model checkpoint as needed.
    num_labels=len(le.classes_),
    args=model_args,
    use_cuda=True
)

# Train the model on the training dataset
model.train_model(train_df)

In [None]:
# Predict on the test dataset
predictions, raw_outputs = model.predict(test_df['text'].tolist())

# Calculate accuracy
accuracy = accuracy_score(test_df['labels'], predictions)
print(f"Accuracy: {accuracy}")

In [None]:
# For example purposes, let's say we have the following:
actual_labels = test_df['labels']
predicted_labels = predictions  # From your model's predictions on the test set

# Calculate precision, recall, F1-score, and support for each class
report = classification_report(actual_labels, predicted_labels, target_names=le.classes_)
print("Classification Report:\n", report)

# Calculate and display the confusion matrix
cm = confusion_matrix(actual_labels, predicted_labels)
print("Confusion Matrix:\n", cm)

cm_df = pd.DataFrame(cm, index=le.classes_, columns=le.classes_)
print("Confusion Matrix with Labels:\n", cm_df)

## Electra

In [None]:
# Define model arguments
model_args = ClassificationArgs(
    num_train_epochs=5,
    use_multiprocessing=False,
    use_multiprocessing_for_evaluation=False,
    output_dir='output',
    overwrite_output_dir=True
)

# Initialize a ClassificationModel using RoBERTa
model = ClassificationModel(
    "electra",
    "google/electra-small-discriminator",
    num_labels=len(le.classes_),
    args=model_args,
    use_cuda=True
)

# Train the model on the training dataset
model.train_model(train_df)

In [None]:
# Predict on the test dataset
predictions, raw_outputs = model.predict(test_df['text'].tolist())

# Calculate accuracy
accuracy = accuracy_score(test_df['labels'], predictions)
print(f"Accuracy: {accuracy}")

In [None]:
# For example purposes, let's say we have the following:
actual_labels = test_df['labels']
predicted_labels = predictions  # From your model's predictions on the test set

# Calculate precision, recall, F1-score, and support for each class
report = classification_report(actual_labels, predicted_labels, target_names=le.classes_)
print("Classification Report:\n", report)

# Calculate and display the confusion matrix
cm = confusion_matrix(actual_labels, predicted_labels)
print("Confusion Matrix:\n", cm)

# Optionally, to make it easier to read, you can print the confusion matrix with labels
cm_df = pd.DataFrame(cm, index=le.classes_, columns=le.classes_)
print("Confusion Matrix with Labels:\n", cm_df)

# Flan-t5-large Model

In [None]:
model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-large")
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-large", max_new_tokens = 500)
model.cuda()

In [None]:
def make_prompt(instance, examples, zero_shot = False):
    prompt = 'In this task you have to determine if a piece of text might lead to a positive or negative change in the price of a stock mentioned in the headline. If you are unsure whether a headline would lead to a change in the stock price, err on the side of caution and label it as neutral.'
    
    prompt_end = 'With these instructions in mind and a piece of text, please reply with either of the three options and nothing else: 1) positive, 2) negative, 3) neutral. How this headline might be labelled based on the past guidelines: '
    
    if zero_shot: # for zero-shot say zero_shot = True
        return f'''{prompt} \n \n {prompt_end} {instance} ?'''
    
    else: # for few-shot say zero_shot = False
        examples_str = ' '.join([f'Example for "{label}": {example}' for label, example in examples.items()])

    return f'''{prompt} {examples_str} \n \n {prompt_end} "{instance}" ?'''

In [None]:
headlines_examples = {'potitive': 'SpartanNash (SPTN) Q1 Earnings Surpass Estimates, Sales Up',
           'negative': 'Abercrombie (ANF) Q1 Loss Wider Than Expected, Sales Fall Y/Y',
           'neutral': 'Kroger Achieves New Zero Hunger | Zero Waste Milestones'}

In [None]:
def model_labelling_headlines(data, use_few_shot):
    all_responses = []

    for _, row in tqdm(data.iterrows(), total=len(data)):
        instance = row['headline'][:512] 
        if use_few_shot:
            prompt = make_prompt(instance, examples=headlines_examples, zero_shot=False)
        else:
            prompt = make_prompt(instance, examples=headlines_examples, zero_shot=True)
        
        inputs = tokenizer(prompt, return_tensors="pt").to("cuda:0")
        outputs = model.generate(**inputs)
        answer = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
        all_responses.append(answer)
        
    return all_responses

In [None]:
all_responses_zero_shot = model_labelling_headlines(final_df, use_few_shot=False)
all_responses_few_shot = model_labelling_headlines(final_df, use_few_shot=True)

In [None]:
results = pd.DataFrame(columns=['headline', 'date', 'label', 'zero_shot', 'few_shot'])
results['headline'] = final_df['headline']
results['date'] = final_df['date']
results['label'] = final_df['Change Category']
results['zero_shot'] = all_responses_zero_shot
results['few_shot'] = all_responses_few_shot

## Evaluation of the results Flan

In [None]:
different_values = (results['zero_shot'] != results['few_shot']).sum()
print("Number of different values between 'zero_shot' and 'few_shot':", different_values)

In [None]:
# Create a column for comparing true label with zero_shot and few_shot predictions
results['zero_shot_correct'] = (results['label'] == results['zero_shot'])
results['few_shot_correct'] = (results['label'] == results['few_shot'])

# Calculate the accuracy for zero_shot and few_shot
zero_shot_accuracy = results['zero_shot_correct'].mean() * 100
few_shot_accuracy = results['few_shot_correct'].mean() * 100

# Print the accuracies
print("Zero Shot Accuracy: {:.2f}%".format(zero_shot_accuracy))
print("Few Shot Accuracy: {:.2f}%".format(few_shot_accuracy))

In [None]:
def compute_macro_f1(df, column_name):
    y_true = df['label']
    y_pred = df[column_name]
    f1 = round(f1_score(y_true, y_pred, average='macro'), 3)
    return f1

In [None]:
zero_shot_f1 = compute_macro_f1(results, 'zero_shot')
few_shot_f1 = compute_macro_f1(results, 'few_shot')

print("Zero Shot F1 Score: {:.3f}".format(zero_shot_f1))
print("Few Shot F1 Score: {:.3f}".format(few_shot_f1))

# Chat-GPT-3.5 turbo Model

In [None]:
openai.api_base="http://91.107.239.71:80" 
openai.api_key="RfX8Hm8IuOBQGBEMpX4C"

In [None]:
def chatgpt_labelling_headlines(data, use_few_shot):
    all_responses = []

    for _, row in tqdm(data.iterrows(), total=len(data)):
        instance = row['headline'][:512] 
        if use_few_shot:
            prompt = make_prompt(instance, examples=headlines_examples, zero_shot=False)
        else:
            prompt = make_prompt(instance, examples=headlines_examples, zero_shot=True)
        responses = openai.ChatCompletion.create(model="gpt-3.5-turbo-0125",
                                         messages=[{"role": "user", "content": prompt}],
                                         max_tokens = 2, 
                                         n=1)
        
        response_list = [row['headline'], row['Change Category']]
        response_list.extend([responses['choices'][0]['message']['content']])
        all_responses.append(response_list)
        
        result = pd.DataFrame(all_responses)
        result.columns = ['headline', 'label', 'gpt_response']
        
    return result

In [None]:
all_responses_zero_shot = chatgpt_labelling_headlines(df, use_few_shot=False)

In [None]:
all_responses_few_shot = chatgpt_labelling_headlines(df, use_few_shot=True)

In [None]:
# define replacements
replacements = {
    'positive': ['Positive', 'positive', 'Positive.', '1)', ' positive'],
    'negative': ['Negative', 'negative', '2)', 'Negative.', '\n\nNegative'],
    'neutral': ['Neutral', 'neutral', 'Neutral.', '3)', ' neutral', 'The headline', 'Neutral ', 'This headline', 'neutral ', '- neutral']}

In [None]:
for key, values in replacements.items():
    chat['gpt_response'] = chat['gpt_response'].replace(values, key)

In [None]:
# confusion matrix for zero_shot
predicted_labels = chat['label']
actual_labels = chat['gpt_response']

# Get the unique labels
unique_labels = sorted(set(actual_labels) | set(predicted_labels))

# Compute confusion matrix
cm = confusion_matrix(actual_labels, predicted_labels, labels=unique_labels)

# Plot confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=unique_labels, yticklabels=unique_labels)
plt.xlabel('Predicted Labels')
plt.ylabel('Actual Labels')
plt.title('Confusion Matrix zero shot ChatGPT')
plt.show()

In [None]:
def compute_scores(df, column_name):
    y_true = df['label']
    y_pred = df[column_name]
    f1_macro = round(f1_score(y_true, y_pred, average='macro'), 3)
    accuracy = round(accuracy_score(y_true, y_pred), 3)
    return {'accuracy': accuracy, 'macro_avg_f1': f1_macro}

In [None]:
zero_shot_gpt = compute_scores(chat, 'gpt_response')

In [None]:
zero_shot_gpt

In [None]:
category_counts  = chat.groupby('gpt_response').size()
category_counts

In [None]:
category_counts.plot(kind='bar', color=['royalblue', 'midnightblue', 'slategrey'])

plt.title('Predicted Categories: zero-shot mode GPT')
plt.xlabel('Label')
plt.ylabel('Counts')
plt.xticks(rotation=0)
plt.show()

# WEB scraping

In [None]:
# extract website name
def get_website_name(url):
    match = re.search(r"(?:https?://)?(?:www\.)?([a-zA-Z0-9-]+)\.", url)
    if match:
        return match.group(1)
    else:
        return "Unknown"  # return Unknown if no match found

filtered_df['website_name'] = filtered_df['url'].apply(get_website_name)

In [None]:
#check the number of articles for all unique sources
filtered_df['website_name'].value_counts()

In [None]:
async def fetch_html(session, url, headers):
    async with session.get(url, headers=headers) as response:
        return await response.text()

async def parse_html(html, attribute):
    try:
        soup = BeautifulSoup(html, 'html.parser')
        content = soup.find('div', class_ = attribute)
        text = content.get_text(strip=True)
    except Exception as e:
        text = None
    return text

async def parse_asynchronously(urls, attribute):
    headers = {
        "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.1 Safari/605.1.15"
    }

    async with aiohttp.ClientSession() as session:
        tasks = [fetch_html(session, url, headers) for url in urls]
        html_responses = await asyncio.gather(*tasks)

    parsed_texts = await asyncio.gather(*(parse_html(html, attribute) for html in html_responses))
    return parsed_texts

async def main(df, attribute):
    
    # List to store parsed texts
    article_text = []
    
    # List of urls 
    urls = df['url'].tolist()
    
    # Asynchronously fetch and parse HTML content
    parsed_texts = await parse_asynchronously(urls, attribute)
    
    # Append parsed texts to the result list
    article_text.extend(parsed_texts)
    
    return article_text

In [None]:
# select source
marketfy = filtered_df[filtered_df['website_name'] == 'marketfy'].copy()

# run the event loop
marketfy_texts = await main(marketfy, "user-content")

In [None]:
# add to df as an article_body column 
marketfy['article_body'] = marketfy_texts

#save the result 
marketfy.to_csv('marketfy.csv')

#check
marketfy.head()

In [None]:
# select source 
foxbusiness = filtered_df[filtered_df['website_name'] == 'foxbusiness'].copy()

# run the event loop
foxbusiness_texts = await main(foxbusiness, "article-content")

In [None]:
# add to df as an article_body column 
foxbusiness['article_body'] = foxbusiness_texts

#save the result 
foxbusiness.to_csv('foxbusiness.csv')

#check
foxbusiness.head()

In [None]:
# ...

# Summarization

## Abstractive summarization

In [None]:
# Instantiate summarization pipeline
model_name = "sshleifer/distilbart-cnn-12-6"
summarizer = pipeline("summarization", model=model_name, tokenizer=model_name, 
                      framework="pt", truncation=True, device = 0)

In [None]:
# Get predicted summaries 

def getSummaryAbstr(summarizer, article):
    result = summarizer(article, min_length = 60)
    summary = result[0]["summary_text"]
    return summary

In [None]:
full_text = pd.read_csv('/kaggle/input/article-full/fulltext_df.csv')

In [None]:
article = full_text['article_body'].astype(str).tolist()
full_text["summary"] = full_text['article_body'].progress_apply(lambda row: getSummaryAbstr(summarizer, row))

In [None]:
full_text.to_csv('full_text_abstractive.csv')

## Extractive summarization

In [None]:
# Load model --> model is set to "bert-base-uncased" 
summarizer = Summarizer(model="distilbert-base-uncased")

In [None]:
def getSummary(summarizer, article):
    result = summarizer(article, min_length = 20)
    summary = "".join(result)
    return summary

In [None]:
fulltext = pd.read_csv('/kaggle/input/fulltext/fulltext_df.csv')
fulltext.head()

In [None]:
fulltext["summary"] = fulltext["article_body"].progress_apply(
    lambda row: getSummary(summarizer, row))

In [None]:
fulltext.to_csv('full_text_extractive.csv')

# Abstractive Summarization ELECTRA

In [None]:
abs_lab = pd.read_csv('abs_lab.csv')

In [None]:
# Rename columns to match expected input ('text' and 'labels')
abs_lab['text'] = abs_lab['summary']  # Copying headline to a new column named 'text'
abs_lab['label'] = abs_lab['Change Category']  # Renaming 'Change Category' for clarity

# Label encoding for 'Change Category'
le = LabelEncoder()
abs_lab['labels'] = le.fit_transform(abs_lab['label'])

# Split the dataset into training and testing sets
train_df, test_df = train_test_split(abs_lab[['text', 'labels']], test_size=0.2, random_state=42)

# Keep only the necessary columns in the train and test sets
train_df = train_df[['text', 'labels']]
test_df = test_df[['text', 'labels']]

In [None]:
# Define model arguments
model_args = ClassificationArgs(
    num_train_epochs=3,
    use_multiprocessing=False,
    use_multiprocessing_for_evaluation=False,
    output_dir='output',
    overwrite_output_dir=True
)

# Initialize a ClassificationModel using RoBERTa
model = ClassificationModel(
    "electra",
    "google/electra-small-discriminator",
    num_labels=len(le.classes_),
    args=model_args,
    use_cuda=True
)

# Train the model on the training dataset
model.train_model(train_df)

In [None]:
# Predict on the test dataset
predictions, raw_outputs = model.predict(test_df['text'].tolist())

# Calculate accuracy
accuracy = accuracy_score(test_df['labels'], predictions)
print(f"Accuracy: {accuracy}")

In [None]:
# For example purposes, let's say we have the following:
actual_labels = test_df['labels']
predicted_labels = predictions  # From your model's predictions on the test set

# Calculate precision, recall, F1-score, and support for each class
report = classification_report(actual_labels, predicted_labels, target_names=le.classes_)
print("Classification Report:\n", report)

# Calculate and display the confusion matrix
cm = confusion_matrix(actual_labels, predicted_labels)
print("Confusion Matrix:\n", cm)

cm_df = pd.DataFrame(cm, index=le.classes_, columns=le.classes_)
print("Confusion Matrix with Labels:\n", cm_df)

# Extractive Summarization ELECTRA

In [None]:
ext_lab = pd.read_csv('ext_lab.csv')

In [None]:
ext_lab['summary'] = ext_lab['summary'].fillna('0')

In [None]:
# Rename columns to match expected input ('text' and 'labels')
ext_lab['text'] = ext_lab['summary']  # Copying headline to a new column named 'text'
ext_lab['label'] = ext_lab['Change Category']  # Renaming 'Change Category' for clarity

# Label encoding for 'Change Category'
le = LabelEncoder()
ext_lab['labels'] = le.fit_transform(ext_lab['label'])

# Split the dataset into training and testing sets
train_df, test_df = train_test_split(ext_lab[['text', 'labels']], test_size=0.2, random_state=42)

# Keep only the necessary columns in the train and test sets
train_df = train_df[['text', 'labels']]
test_df = test_df[['text', 'labels']]

In [None]:
# Define model arguments
model_args = ClassificationArgs(
    num_train_epochs=3,
    use_multiprocessing=False,
    use_multiprocessing_for_evaluation=False,
    output_dir='output',
    overwrite_output_dir=True
)

# Initialize a ClassificationModel using RoBERTa
model = ClassificationModel(
    "electra",
    "google/electra-small-discriminator",
    num_labels=len(le.classes_),
    args=model_args,
    use_cuda=True
)

# Train the model on the training dataset
model.train_model(train_df)

In [None]:
# Predict on the test dataset
predictions, raw_outputs = model.predict(test_df['text'].tolist())

# Calculate accuracy
accuracy = accuracy_score(test_df['labels'], predictions)
print(f"Accuracy: {accuracy}")

In [None]:
# For example purposes, let's say we have the following:
actual_labels = test_df['labels']
predicted_labels = predictions  # From your model's predictions on the test set

# Calculate precision, recall, F1-score, and support for each class
report = classification_report(actual_labels, predicted_labels, target_names=le.classes_)
print("Classification Report:\n", report)

# Calculate and display the confusion matrix
cm = confusion_matrix(actual_labels, predicted_labels)
print("Confusion Matrix:\n", cm)

cm_df = pd.DataFrame(cm, index=le.classes_, columns=le.classes_)
print("Confusion Matrix with Labels:\n", cm_df)

## Abstractive summary results analysis

In [None]:
abs_summary = pd.read_csv('abs_summary.csv')

In [None]:
# define replacements
replacements = {
    'positive': ['Positive', 'positive', 'Positive.', '1)'],
    'negative': ['Negative', 'negative', '2)'],
    'neutral': ['Neutral', 'neutral', 'Neutral.', '3)', ' neutral', 'The headline', 'Once again', 'neutral ']}
    
# perform replacements
for key, values in replacements.items():
    abs_summary['zero_shot_gpt'] = abs_summary['zero_shot_gpt'].replace(values, key)
    abs_summary['few_shot_gpt'] = abs_summary['few_shot_gpt'].replace(values, key)

### Flan T5 results for abstractive summary 

In [None]:
# number of different labels between few shot and zero shot of Flan T5 classification
different_values = (abs_summary['zero_shot'] != abs_summary['few_shot']).sum()
print("Number of different values between 'zero_shot' and 'few_shot':", different_values)

In [None]:
# Create a column for comparing true label with zero_shot and few_shot predictions of Flan T5 
abs_summary['zero_shot_correct'] = (abs_summary['Change Category'] == abs_summary['zero_shot'])
abs_summary['few_shot_correct'] = (abs_summary['Change Category'] == abs_summary['few_shot'])

# Calculate the accuracy for zero_shot and few_shot
zero_shot_accuracy = abs_summary['zero_shot_correct'].mean() * 100
few_shot_accuracy = abs_summary['few_shot_correct'].mean() * 100

# Print the accuracies
print("Zero Shot Accuracy: {:.2f}%".format(zero_shot_accuracy))
print("Few Shot Accuracy: {:.2f}%".format(few_shot_accuracy))

In [None]:
def compute_macro_f1(df, column_name):
    y_true = df['Change Category']
    y_pred = df[column_name]
    f1 = round(f1_score(y_true, y_pred, average='macro'), 3)
    return f1

In [None]:
zero_shot_f1 = compute_macro_f1(abs_summary, 'zero_shot')
few_shot_f1 = compute_macro_f1(abs_summary, 'few_shot')

print("Zero Shot F1 Score: {:.3f}".format(zero_shot_f1))
print("Few Shot F1 Score: {:.3f}".format(few_shot_f1))

In [None]:
# confusion matrix for zero_shot
predicted_labels = abs_summary['zero_shot']
actual_labels = abs_summary['Change Category']

# Get the unique labels
unique_labels = sorted(set(actual_labels) | set(predicted_labels))

# Compute confusion matrix
cm = confusion_matrix(actual_labels, predicted_labels, labels=unique_labels)

# Plot confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=unique_labels, yticklabels=unique_labels)
plt.xlabel('Predicted Labels')
plt.ylabel('Actual Labels')
plt.title('Confusion Matrix zero shot GPT for extractive summary')
plt.show()

In [None]:
# confusion matrix for few_shot
predicted_labels = abs_summary['few_shot']
actual_labels = abs_summary['Change Category']

# Get the unique labels
unique_labels = sorted(set(actual_labels) | set(predicted_labels))

# Compute confusion matrix
cm = confusion_matrix(actual_labels, predicted_labels, labels=unique_labels)

# Plot confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=unique_labels, yticklabels=unique_labels)
plt.xlabel('Predicted Labels')
plt.ylabel('Actual Labels')
plt.title('Confusion Matrix few shot Flan T5')
plt.show()

In [None]:
# Is there connection between time & correct predictions?

abs_summary['date'] = pd.to_datetime(abs_summary['date'])

# Extract year from date
abs_summary['year'] = abs_summary['date'].dt.year

# Count the number of correctly predicted labels for each year
correct_predictions = abs_summary[abs_summary['Change Category'] == abs_summary['zero_shot']].groupby('year').size().reset_index(name='correct_predictions')

# Merge with total counts to get the proportion of correct predictions
total_predictions = abs_summary.groupby('year').size().reset_index(name='total_predictions')
analysis_df = pd.merge(total_predictions, correct_predictions, on='year', how='left')

# Calculate proportion of correct predictions
analysis_df['correct_proportion'] = analysis_df['correct_predictions'] / analysis_df['total_predictions']

In [None]:
# Count the number of correctly predicted labels for each year 
correct_predictions_zero_shot = abs_summary[abs_summary['Change Category'] == abs_summary['zero_shot']].groupby('year').size().reset_index(name='correct_predictions_zero_shot')
correct_predictions_few_shot = abs_summary[abs_summary['Change Category'] == abs_summary['few_shot']].groupby('year').size().reset_index(name='correct_predictions_few_shot')

# Merge with total counts to get the proportion of correct predictions for zero_shot
total_predictions_zero_shot = abs_summary.groupby('year').size().reset_index(name='total_predictions')
analysis_df = pd.merge(total_predictions_zero_shot, correct_predictions_zero_shot, on='year', how='left')

# Merge with total counts to get the proportion of correct predictions for few_shot
total_predictions_few_shot = abs_summary.groupby('year').size().reset_index(name='total_predictions_few_shot')
analysis_df = pd.merge(analysis_df, correct_predictions_few_shot, on='year', how='left')

# Calculate proportion of correct predictions for each mode
analysis_df['zero_shot_correct_proportion'] = analysis_df['correct_predictions_zero_shot'] / analysis_df['total_predictions']
analysis_df['few_shot_correct_proportion'] = analysis_df['correct_predictions_few_shot'] / analysis_df['total_predictions']

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(analysis_df['year'], analysis_df['zero_shot_correct_proportion'], marker='o', linestyle='-', label='Zero Shot Flant5 Large')
plt.plot(analysis_df['year'], analysis_df['few_shot_correct_proportion'], marker='o', linestyle='-', label='Few Shot Flant5 Large')
plt.title('Proportion of Correct Predictions Over the Years')
plt.xlabel('Year')
plt.ylabel('Accuracy')
plt.grid(True)
plt.xticks(analysis_df['year'])
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
category_counts  = abs_summary.groupby('zero_shot').size()
category_counts

In [None]:
category_counts.plot(kind='bar', color=['royalblue', 'midnightblue', 'slategrey'])

plt.title('Predicted Categories: zero-shot mode Flan T5')
plt.xlabel('Label')
plt.ylabel('Counts')
plt.xticks(rotation=0)
plt.show()

### Chat GPT results for abstractive summary 

In [None]:
# number of different labels between few shot and zero shotof GTP classification
different_values = (abs_summary['few_shot_gpt'] != abs_summary['zero_shot_gpt']).sum()
print("Number of different values between 'zero_shot' and 'few_shot':", different_values)

In [None]:
# Create a column for comparing true label with zero_shot and few_shot predictions of Flan T5 
abs_summary['zero_shot_correct'] = (abs_summary['Change Category'] == abs_summary['zero_shot_gpt'])
abs_summary['few_shot_correct'] = (abs_summary['Change Category'] == abs_summary['few_shot_gpt'])

# Calculate the accuracy for zero_shot and few_shot
zero_shot_accuracy = abs_summary['zero_shot_correct'].mean() * 100
few_shot_accuracy = abs_summary['few_shot_correct'].mean() * 100

# Print the accuracies
print("Zero Shot Accuracy: {:.2f}%".format(zero_shot_accuracy))
print("Few Shot Accuracy: {:.2f}%".format(few_shot_accuracy))

In [None]:
zero_shot_f1 = compute_macro_f1(abs_summary, 'zero_shot_gpt')
few_shot_f1 = compute_macro_f1(abs_summary, 'few_shot_gpt')

print("Zero Shot F1 Score: {:.3f}".format(zero_shot_f1))
print("Few Shot F1 Score: {:.3f}".format(few_shot_f1))

In [None]:
# Is there connection between time & correct predictions?

abs_summary['date'] = pd.to_datetime(abs_summary['date'])

# Extract year from date
abs_summary['year'] = abs_summary['date'].dt.year

# Count the number of correctly predicted labels for each year
correct_predictions = abs_summary[abs_summary['Change Category'] == abs_summary['zero_shot_gpt']].groupby('year').size().reset_index(name='correct_predictions')

# Merge with total counts to get the proportion of correct predictions
total_predictions = abs_summary.groupby('year').size().reset_index(name='total_predictions')
analysis_df = pd.merge(total_predictions, correct_predictions, on='year', how='left')

# Calculate proportion of correct predictions
analysis_df['correct_proportion'] = analysis_df['correct_predictions'] / analysis_df['total_predictions']


In [None]:
# Count the number of correctly predicted labels for each year 
correct_predictions_zero_shot = abs_summary[abs_summary['Change Category'] == abs_summary['zero_shot_gpt']].groupby('year').size().reset_index(name='correct_predictions_zero_shot')
correct_predictions_few_shot = abs_summary[abs_summary['Change Category'] == abs_summary['few_shot_gpt']].groupby('year').size().reset_index(name='correct_predictions_few_shot')

# Merge with total counts to get the proportion of correct predictions for zero_shot
total_predictions_zero_shot = abs_summary.groupby('year').size().reset_index(name='total_predictions')
analysis_df = pd.merge(total_predictions_zero_shot, correct_predictions_zero_shot, on='year', how='left')

# Merge with total counts to get the proportion of correct predictions for few_shot
total_predictions_few_shot = abs_summary.groupby('year').size().reset_index(name='total_predictions_few_shot')
analysis_df = pd.merge(analysis_df, correct_predictions_few_shot, on='year', how='left')

# Calculate proportion of correct predictions for each mode
analysis_df['zero_shot_correct_proportion'] = analysis_df['correct_predictions_zero_shot'] / analysis_df['total_predictions']
analysis_df['few_shot_correct_proportion'] = analysis_df['correct_predictions_few_shot'] / analysis_df['total_predictions']

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(analysis_df['year'], analysis_df['zero_shot_correct_proportion'], marker='o', linestyle='-', label='Zero Shot GPT')
plt.plot(analysis_df['year'], analysis_df['few_shot_correct_proportion'], marker='o', linestyle='-', label='Few Shot GPT')
plt.title('Proportion of Correct Predictions Over the Years')
plt.xlabel('Year')
plt.ylabel('Accuracy')
plt.grid(True)
plt.xticks(analysis_df['year'])
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
category_counts  = abs_summary.groupby('zero_shot_gpt').size()
category_counts

In [None]:
category_counts.plot(kind='bar', color=['royalblue', 'midnightblue', 'slategrey'])

plt.title('Predicted Categories: zero-shot mode GPT')
plt.xlabel('Label')
plt.ylabel('Counts')
plt.xticks(rotation=0)
plt.show()

In [None]:
# confusion matrix for few_shot
predicted_labels = abs_summary['zero_shot_gpt']
actual_labels = abs_summary['Change Category']

# Get the unique labels
unique_labels = sorted(set(actual_labels) | set(predicted_labels))

# Compute confusion matrix
cm = confusion_matrix(actual_labels, predicted_labels, labels=unique_labels)

# Plot confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=unique_labels, yticklabels=unique_labels)
plt.xlabel('Predicted Labels')
plt.ylabel('Actual Labels')
plt.title('Confusion Matrix zero shot GPT')
plt.show()

In [None]:
# confusion matrix for few_shot
predicted_labels = abs_summary['few_shot_gpt']
actual_labels = abs_summary['Change Category']

# Get the unique labels
unique_labels = sorted(set(actual_labels) | set(predicted_labels))

# Compute confusion matrix
cm = confusion_matrix(actual_labels, predicted_labels, labels=unique_labels)

# Plot confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=unique_labels, yticklabels=unique_labels)
plt.xlabel('Predicted Labels')
plt.ylabel('Actual Labels')
plt.title('Confusion Matrix few shot GPT')
plt.show()

## Extractive summary results analysis

In [None]:
ext_summary = pd.read_csv('ext_summary.csv')

In [None]:
# define replacements
replacements = {
    'positive': ['Positive', 'positive', 'Positive.', '1)'],
    'negative': ['Negative', 'negative', '2)'],
    'neutral': ['Neutral', 'neutral', 'Neutral.', '3)', 'neutral.', ' neutral', 'neut']}
    
# perform replacements
for key, values in replacements.items():
    ext_summary['gpt_zero_shot'] = abs_summary['zero_shot_gpt'].replace(values, key)
    ext_summary['gpt_few_shot'] = abs_summary['few_shot_gpt'].replace(values, key)

### Flan T5 results for extractive summary

In [None]:
# number of different labels between few shot and zero shot of Flan T5 classification
different_values = (ext_summary['zero_shot'] != ext_summary['few_shot']).sum()
print("Number of different values between 'zero_shot' and 'few_shot':", different_values)

In [None]:
# Create a column for comparing true label with zero_shot and few_shot predictions of Flan T5 
ext_summary['zero_shot_correct'] = (ext_summary['Change Category'] == ext_summary['zero_shot'])
ext_summary['few_shot_correct'] = (ext_summary['Change Category'] == ext_summary['few_shot'])

# Calculate the accuracy for zero_shot and few_shot
zero_shot_accuracy = ext_summary['zero_shot_correct'].mean() * 100
few_shot_accuracy = ext_summary['few_shot_correct'].mean() * 100

# Print the accuracies
print("Zero Shot Accuracy: {:.2f}%".format(zero_shot_accuracy))
print("Few Shot Accuracy: {:.2f}%".format(few_shot_accuracy))

In [None]:
zero_shot_f1 = compute_macro_f1(ext_summary, 'zero_shot')
few_shot_f1 = compute_macro_f1(ext_summary, 'few_shot')

print("Zero Shot F1 Score: {:.3f}".format(zero_shot_f1))
print("Few Shot F1 Score: {:.3f}".format(few_shot_f1))

In [None]:
# confusion matrix for zero_shot
predicted_labels = ext_summary['zero_shot']
actual_labels = ext_summary['Change Category']

# Get the unique labels
unique_labels = sorted(set(actual_labels) | set(predicted_labels))

# Compute confusion matrix
cm = confusion_matrix(actual_labels, predicted_labels, labels=unique_labels)

# Plot confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=unique_labels, yticklabels=unique_labels)
plt.xlabel('Predicted Labels')
plt.ylabel('Actual Labels')
plt.title('Confusion Matrix zero shot Flan T5 for extractive summary')
plt.show()

In [None]:
# confusion matrix for zero_shot
predicted_labels = ext_summary['few_shot']
actual_labels = ext_summary['Change Category']

# Get the unique labels
unique_labels = sorted(set(actual_labels) | set(predicted_labels))

# Compute confusion matrix
cm = confusion_matrix(actual_labels, predicted_labels, labels=unique_labels)

# Plot confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=unique_labels, yticklabels=unique_labels)
plt.xlabel('Predicted Labels')
plt.ylabel('Actual Labels')
plt.title('Confusion Matrix few shot Flan T5 for extractive summary')
plt.show()

In [None]:
# Is there connection between time & correct predictions?

ext_summary['date'] = pd.to_datetime(ext_summary['date'])

# Extract year from date
ext_summary['year'] = ext_summary['date'].dt.year

# Count the number of correctly predicted labels for each year
correct_predictions = ext_summary[ext_summary['Change Category'] == ext_summary['zero_shot']].groupby('year').size().reset_index(name='correct_predictions')

# Merge with total counts to get the proportion of correct predictions
total_predictions = ext_summary.groupby('year').size().reset_index(name='total_predictions')
analysis_df = pd.merge(total_predictions, correct_predictions, on='year', how='left')

# Calculate proportion of correct predictions
analysis_df['correct_proportion'] = analysis_df['correct_predictions'] / analysis_df['total_predictions']

In [None]:
# Count the number of correctly predicted labels for each year 
correct_predictions_zero_shot = ext_summary[ext_summary['Change Category'] == ext_summary['zero_shot']].groupby('year').size().reset_index(name='correct_predictions_zero_shot')
correct_predictions_few_shot = ext_summary[ext_summary['Change Category'] == ext_summary['few_shot']].groupby('year').size().reset_index(name='correct_predictions_few_shot')

# Merge with total counts to get the proportion of correct predictions for zero_shot
total_predictions_zero_shot = ext_summary.groupby('year').size().reset_index(name='total_predictions')
analysis_df = pd.merge(total_predictions_zero_shot, correct_predictions_zero_shot, on='year', how='left')

# Merge with total counts to get the proportion of correct predictions for few_shot
total_predictions_few_shot = ext_summary.groupby('year').size().reset_index(name='total_predictions_few_shot')
analysis_df = pd.merge(analysis_df, correct_predictions_few_shot, on='year', how='left')

# Calculate proportion of correct predictions for each mode
analysis_df['zero_shot_correct_proportion'] = analysis_df['correct_predictions_zero_shot'] / analysis_df['total_predictions']
analysis_df['few_shot_correct_proportion'] = analysis_df['correct_predictions_few_shot'] / analysis_df['total_predictions']

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(analysis_df['year'], analysis_df['zero_shot_correct_proportion'], marker='o', linestyle='-', label='Zero Shot Flant5 Large for extractive summary')
plt.plot(analysis_df['year'], analysis_df['few_shot_correct_proportion'], marker='o', linestyle='-', label='Few Shot Flant5 Large for extractive summary')
plt.title('Proportion of Correct Predictions Over the Years')
plt.xlabel('Year')
plt.ylabel('Accuracy')
plt.grid(True)
plt.xticks(analysis_df['year'])
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
category_counts  = ext_summary.groupby('zero_shot').size()
category_counts

In [None]:
category_counts.plot(kind='bar', color=['royalblue', 'midnightblue', 'slategrey'])

plt.title('Predicted Categories: zero-shot mode Flan T5 for extractive summary')
plt.xlabel('Label')
plt.ylabel('Counts')
plt.xticks(rotation=0)
plt.show()

### Chat GPT results for extractive summary

In [None]:
# number of different labels between few shot and zero shot of Flan T5 classification
different_values = (ext_summary['gpt_zero_shot'] != ext_summary['gpt_few_shot']).sum()
print("Number of different values between 'zero_shot' and 'few_shot':", different_values)

In [None]:
# Create a column for comparing true label with zero_shot and few_shot predictions of Flan T5 
ext_summary['zero_shot_correct'] = (ext_summary['Change Category'] == ext_summary['gpt_zero_shot'])
ext_summary['few_shot_correct'] = (ext_summary['Change Category'] == ext_summary['gpt_few_shot'])

# Calculate the accuracy for zero_shot and few_shot
zero_shot_accuracy = ext_summary['zero_shot_correct'].mean() * 100
few_shot_accuracy = ext_summary['few_shot_correct'].mean() * 100

# Print the accuracies
print("Zero Shot Accuracy: {:.2f}%".format(zero_shot_accuracy))
print("Few Shot Accuracy: {:.2f}%".format(few_shot_accuracy))

In [None]:
zero_shot_f1 = compute_macro_f1(ext_summary, 'gpt_zero_shot')
few_shot_f1 = compute_macro_f1(ext_summary, 'gpt_few_shot')

print("Zero Shot F1 Score: {:.3f}".format(zero_shot_f1))
print("Few Shot F1 Score: {:.3f}".format(few_shot_f1))

In [None]:
# confusion matrix for zero_shot
predicted_labels = ext_summary['gpt_zero_shot']
actual_labels = ext_summary['Change Category']

# Get the unique labels
unique_labels = sorted(set(actual_labels) | set(predicted_labels))

# Compute confusion matrix
cm = confusion_matrix(actual_labels, predicted_labels, labels=unique_labels)

# Plot confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=unique_labels, yticklabels=unique_labels)
plt.xlabel('Predicted Labels')
plt.ylabel('Actual Labels')
plt.title('Confusion Matrix zero shot GPT for extractive summary')
plt.show()

In [None]:
# confusion matrix for zero_shot
predicted_labels = ext_summary['gpt_few_shot']
actual_labels = ext_summary['Change Category']

# Get the unique labels
unique_labels = sorted(set(actual_labels) | set(predicted_labels))

# Compute confusion matrix
cm = confusion_matrix(actual_labels, predicted_labels, labels=unique_labels)

# Plot confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=unique_labels, yticklabels=unique_labels)
plt.xlabel('Predicted Labels')
plt.ylabel('Actual Labels')
plt.title('Confusion Matrix few shot GPT for extractive summary')
plt.show()

In [None]:
# Is there connection between time & correct predictions?

ext_summary['date'] = pd.to_datetime(ext_summary['date'])

# Extract year from date
ext_summary['year'] = ext_summary['date'].dt.year

# Count the number of correctly predicted labels for each year
correct_predictions = ext_summary[ext_summary['Change Category'] == ext_summary['gpt_zero_shot']].groupby('year').size().reset_index(name='correct_predictions')

# Merge with total counts to get the proportion of correct predictions
total_predictions = ext_summary.groupby('year').size().reset_index(name='total_predictions')
analysis_df = pd.merge(total_predictions, correct_predictions, on='year', how='left')

# Calculate proportion of correct predictions
analysis_df['correct_proportion'] = analysis_df['correct_predictions'] / analysis_df['total_predictions']

In [None]:
# Count the number of correctly predicted labels for each year 
correct_predictions_zero_shot = ext_summary[ext_summary['Change Category'] == ext_summary['gpt_zero_shot']].groupby('year').size().reset_index(name='correct_predictions_zero_shot')
correct_predictions_few_shot = ext_summary[ext_summary['Change Category'] == ext_summary['gpt_few_shot']].groupby('year').size().reset_index(name='correct_predictions_few_shot')

# Merge with total counts to get the proportion of correct predictions for zero_shot
total_predictions_zero_shot = ext_summary.groupby('year').size().reset_index(name='total_predictions')
analysis_df = pd.merge(total_predictions_zero_shot, correct_predictions_zero_shot, on='year', how='left')

# Merge with total counts to get the proportion of correct predictions for few_shot
total_predictions_few_shot = ext_summary.groupby('year').size().reset_index(name='total_predictions_few_shot')
analysis_df = pd.merge(analysis_df, correct_predictions_few_shot, on='year', how='left')

# Calculate proportion of correct predictions for each mode
analysis_df['zero_shot_correct_proportion'] = analysis_df['correct_predictions_zero_shot'] / analysis_df['total_predictions']
analysis_df['few_shot_correct_proportion'] = analysis_df['correct_predictions_few_shot'] / analysis_df['total_predictions']

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(analysis_df['year'], analysis_df['zero_shot_correct_proportion'], marker='o', linestyle='-', label='Zero Shot GPT for extractive summary')
plt.plot(analysis_df['year'], analysis_df['few_shot_correct_proportion'], marker='o', linestyle='-', label='Few Shot GPT for extractive summary')
plt.title('Proportion of Correct Predictions Over the Years')
plt.xlabel('Year')
plt.ylabel('Accuracy')
plt.grid(True)
plt.xticks(analysis_df['year'])
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
category_counts  = ext_summary.groupby('gpt_zero_shot').size()
category_counts

In [None]:
category_counts.plot(kind='bar', color=['royalblue', 'midnightblue', 'slategrey'])

plt.title('Predicted Categories: zero-shot mode GPT for extractive summary')
plt.xlabel('Label')
plt.ylabel('Counts')
plt.xticks(rotation=0)
plt.show()