### Finetuning Dataset Preparation

In [1]:
from tqdm import tqdm
import pandas as pd
from openai import OpenAI

In [None]:
df = pd.read_excel("Master_Dataset_v15.xlsx")

In [8]:
df = df[["Title"]]
df.dropna(inplace=True)
df.head()

Unnamed: 0,Title
0,UBS: Ospel verzichtet auf über 20 Millionen Fr...
1,Danuser im Dienst der Deutschen Bank - finews.ch
2,Banque Bonhôte als Krisengewinnerin - finews.ch
3,Bénédict Hentsch trennt sich von Hedge-Fund - ...
4,Helvetia: Vierzehn neue Mitglieder der Direkti...


### GPT Labelling

https://arxiv.org/pdf/2303.15056

In [None]:
client = OpenAI(api_key="insert your api key here")

In [4]:
#df["GPT3_5_label"] = "pending"
df.head()

Unnamed: 0,Title,GPT3_5_label
0,UBS: Ospel verzichtet auf über 20 Millionen Fr...,SELL
1,Danuser im Dienst der Deutschen Bank - finews.ch,HOLD
2,Banque Bonhôte als Krisengewinnerin - finews.ch,BUY
3,Bénédict Hentsch trennt sich von Hedge-Fund - ...,SELL
4,Helvetia: Vierzehn neue Mitglieder der Direkti...,HOLD


In [3]:
df = pd.read_excel("finetuning_gpt_labelled_v22000.xlsx")

In [17]:
for i in tqdm(range(len(df.Title))):
    if df.GPT3_5_label[i] == "pending":
        
        try:
            role = """
                You are a helpful financial analyst. I will provide you with a financial news article title in german and I want you to read it and give me your sentiment about it.
                    """

            instruction= """
            I will provide you with a financial news article titles in german and I want you to read it and give me your sentiment about it. You should
            express if the provided information is positive, negative or neutral for the swiss stock market. If you think that the information is positive
            and the stock market will likely go up, I want your answer be BUY. If you think that the information is negative and the stock market will likely go down,
            I want your answer be SELL. If you think that the information is neutral and the stock market will likely stay the same, I want your answer be HOLD.
            In the case the information is not really relevant for the stock market, I want your answer to be HOLD.

            I'am going to use this information to cluster the news articles and construct a weekly sentiment index for the swiss stock market.

            I want your answer be exacly one of the following options, just one word, dont add anything else:

            BUY, HOLD, SELL
                        """

            article_text = df.Title[i]
            completion = client.chat.completions.create(
            model="gpt-3.5-turbo-0125",
            max_tokens=2,
            temperature=0.0,
            messages=[{"role": "system", "content": role},{"role": "user", "content": instruction+" "+article_text}])
            if completion.choices[0].message.content in ["BUY", "SELL", "HOLD", "Sell", "Buy", "Hold", "buy", "sell", "hold"]:
                df.loc[i, "GPT3_5_label"] = completion.choices[0].message.content
        except:
            pass

100%|██████████| 27956/27956 [00:00<00:00, 156207.36it/s]


In [18]:
df.to_excel("finetuning_gpt_labelled_vfinal.xlsx", index=False)

In [19]:
label_counts = df['GPT3_5_label'].value_counts()
print(label_counts)

GPT3_5_label
HOLD    14403
SELL     8076
BUY      5477
Name: count, dtype: int64


In [20]:
from sklearn.utils import resample

# Separate each class into different DataFrames
df_buy = df[df['GPT3_5_label'] == 'BUY']
df_hold = df[df['GPT3_5_label'] == 'HOLD']
df_sell = df[df['GPT3_5_label'] == 'SELL']

# Determine the minimum class count
min_count = min(len(df_buy), len(df_hold), len(df_sell))

# Resample each class to the minimum class count
df_buy_balanced = resample(df_buy, replace=False, n_samples=min_count, random_state=42)
df_hold_balanced = resample(df_hold, replace=False, n_samples=min_count, random_state=42)
df_sell_balanced = resample(df_sell, replace=False, n_samples=min_count, random_state=42)

# Combine the balanced DataFrames
df_balanced = pd.concat([df_buy_balanced, df_hold_balanced, df_sell_balanced])

# Shuffle the balanced DataFrame
df_balanced = df_balanced.sample(frac=1, random_state=42).reset_index(drop=True)

# Display the balanced dataset
print(df_balanced['GPT3_5_label'].value_counts())

GPT3_5_label
HOLD    5477
BUY     5477
SELL    5477
Name: count, dtype: int64


In [21]:
df_balanced.to_excel("finetuning_gpt_labelled_vfinal_balanced.xlsx", index=False)

### Performance Test

In [None]:
df = pd.read_excel("finetuning_gpt_labelled_vfinal_balanced.xlsx")

In [3]:
df.head()

Unnamed: 0,Title,GPT3_5_label
0,Privatinseln: Wo Banker ihren Bonus elegant in...,HOLD
1,Credit Suisse hat Lücke in der Unternehmerbank...,BUY
2,Weiterer Vermögensverwalter mit Finma-Lizenz -...,HOLD
3,Jordan: «Zentralbanken können nicht jedes Prob...,HOLD
4,Eklat bei BLKB-Tochter Radicant - finews.ch,SELL


In [None]:
# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("text-classification", model="AlGatone21/SwissFinBERT", device=0)

In [6]:
df["SwissFinBERT_label"] = "pending"

In [7]:
for row in tqdm(range(len(df.Title))):
    try:
        result = pipe(df.Title[row])
        df.loc[row, "SwissFinBERT_label"] = result[0]["label"]
    except:
        pass

  0%|          | 1/16431 [00:00<1:28:35,  3.09it/s]--- Logging error ---
Traceback (most recent call last):
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/logging/__init__.py", line 1100, in emit
    msg = self.format(record)
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/logging/__init__.py", line 943, in format
    return fmt.format(record)
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/logging/__init__.py", line 678, in format
    record.message = record.getMessage()
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/logging/__init__.py", line 368, in getMessage
    msg = msg % self.args
TypeError: not all arguments converted during string formatting
Call stack:
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)


In [7]:
df.head()

Unnamed: 0,Title,GPT3_5_label,SwissFinBERT_label
0,Privatinseln: Wo Banker ihren Bonus elegant in...,HOLD,HOLD
1,Credit Suisse hat Lücke in der Unternehmerbank...,BUY,BUY
2,Weiterer Vermögensverwalter mit Finma-Lizenz -...,HOLD,BUY
3,Jordan: «Zentralbanken können nicht jedes Prob...,HOLD,SELL
4,Eklat bei BLKB-Tochter Radicant - finews.ch,SELL,SELL


In [8]:
from transformers import pipeline

sent_pipeline = pipeline("text-classification", model="scherrmann/GermanFinBert_SC_Sentiment", device=0)

In [9]:
df["GermanFinBERT_label"] = "pending"

In [10]:
for row in tqdm(range(len(df.Title))):
    try:
        result = sent_pipeline(df.Title[row])
        df.loc[row, "GermanFinBERT_label"] = result[0]["label"]
    except:
        pass

100%|██████████| 16431/16431 [02:05<00:00, 130.54it/s]


In [11]:
from transformers import pipeline

sent_pipeline = pipeline("sentiment-analysis", model="oliverguhr/german-sentiment-bert", device=0)

In [12]:
df["GermanSentimentBERT_label"] = "pending"

In [13]:
for row in tqdm(range(len(df.Title))):
    try:
        result = sent_pipeline(df.Title[row])
        df.loc[row, "GermanSentimentBERT_label"] = result[0]["label"]
    except:
        pass

100%|██████████| 16431/16431 [02:01<00:00, 135.33it/s]


In [14]:
gpt3_5_label_counts = df['GPT3_5_label'].value_counts()
swiss_fin_bert_label_counts = df['SwissFinBERT_label'].value_counts()
german_fin_bert_label_counts = df['GermanFinBERT_label'].value_counts()
german_sentiment_bert_label_counts = df['GermanSentimentBERT_label'].value_counts()

print("GPT3_5_label counts:\n", gpt3_5_label_counts)
print("\nSwissFinBERT_label counts:\n", swiss_fin_bert_label_counts)
print("\nGermanFinBERT_label counts:\n", german_fin_bert_label_counts)
print("\nGermanSentimentBERT_label counts:\n", german_sentiment_bert_label_counts)

GPT3_5_label counts:
 GPT3_5_label
HOLD    5477
BUY     5477
SELL    5477
Name: count, dtype: int64

SwissFinBERT_label counts:
 SwissFinBERT_label
SELL    5518
HOLD    5467
BUY     5446
Name: count, dtype: int64

GermanFinBERT_label counts:
 GermanFinBERT_label
Neutral    13223
Negativ     1645
Positiv     1563
Name: count, dtype: int64

GermanSentimentBERT_label counts:
 GermanSentimentBERT_label
neutral     13276
negative     2617
positive      538
Name: count, dtype: int64


In [16]:
label_matchings = { "BUY" : 2, "HOLD" : 1, "SELL" : 0, "positive" : 2, "neutral" : 1, "negative" : 0, "pending" : 1, "Negativ" : 0, "Neutral" : 1, "Positiv" : 2}

df["GermanSentimentBERT_label"] = df["GermanSentimentBERT_label"].map(label_matchings)
df["GermanFinBERT_label"] = df["GermanFinBERT_label"].map(label_matchings)
#df["FinBERT_label"] = df["FinBERT_label"].map(label_matchings)
df["GPT3_5_label"] = df["GPT3_5_label"].map(label_matchings)
#df["Gemini_label"] = df["Gemini_label"].map(label_matchings)
df["SwissFinBERT_label"] = df["SwissFinBERT_label"].map(label_matchings)

df.head()

Unnamed: 0,Title,GPT3_5_label,SwissFinBERT_label,GermanFinBERT_label,GermanSentimentBERT_label
0,Privatinseln: Wo Banker ihren Bonus elegant in...,1,1,1,1
1,Credit Suisse hat Lücke in der Unternehmerbank...,2,2,1,1
2,Weiterer Vermögensverwalter mit Finma-Lizenz -...,1,1,1,0
3,Jordan: «Zentralbanken können nicht jedes Prob...,1,1,1,1
4,Eklat bei BLKB-Tochter Radicant - finews.ch,0,0,1,1


In [17]:
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

# Calculate metrics for SwissFinBERT
swiss_fin_bert_accuracy = accuracy_score(df["GPT3_5_label"], df["SwissFinBERT_label"])
swiss_fin_bert_f1 = f1_score(df["GPT3_5_label"], df["SwissFinBERT_label"], average="weighted")
swiss_fin_bert_precision = precision_score(df["GPT3_5_label"], df["SwissFinBERT_label"], average="weighted")
swiss_fin_bert_recall = recall_score(df["GPT3_5_label"], df["SwissFinBERT_label"], average="weighted")

# Calculate metrics for GermanFinBERT
german_fin_bert_accuracy = accuracy_score(df["GPT3_5_label"], df["GermanFinBERT_label"])
german_fin_bert_f1 = f1_score(df["GPT3_5_label"], df["GermanFinBERT_label"], average="weighted")
german_fin_bert_precision = precision_score(df["GPT3_5_label"], df["GermanFinBERT_label"], average="weighted")
german_fin_bert_recall = recall_score(df["GPT3_5_label"], df["GermanFinBERT_label"], average="weighted")

# Calculate metrics for GermanSentimentBERT
german_sentiment_bert_accuracy = accuracy_score(df["GPT3_5_label"], df["GermanSentimentBERT_label"])
german_sentiment_bert_f1 = f1_score(df["GPT3_5_label"], df["GermanSentimentBERT_label"], average="weighted")
german_sentiment_bert_precision = precision_score(df["GPT3_5_label"], df["GermanSentimentBERT_label"], average="weighted")
german_sentiment_bert_recall = recall_score(df["GPT3_5_label"], df["GermanSentimentBERT_label"], average="weighted")

# Print metrics
print("SwissFinBERT Accuracy:", swiss_fin_bert_accuracy)
print("SwissFinBERT F1 Score:", swiss_fin_bert_f1)
print("SwissFinBERT Precision:", swiss_fin_bert_precision)
print("SwissFinBERT Recall:", swiss_fin_bert_recall)

print("GermanFinBERT Accuracy:", german_fin_bert_accuracy)
print("GermanFinBERT F1 Score:", german_fin_bert_f1)
print("GermanFinBERT Precision:", german_fin_bert_precision)
print("GermanFinBERT Recall:", german_fin_bert_recall)

print("GermanSentimentBERT Accuracy:", german_sentiment_bert_accuracy)
print("GermanSentimentBERT F1 Score:", german_sentiment_bert_f1)
print("GermanSentimentBERT Precision:", german_sentiment_bert_precision)
print("GermanSentimentBERT Recall:", german_sentiment_bert_recall)


SwissFinBERT Accuracy: 0.8662284705739152
SwissFinBERT F1 Score: 0.8661890086565933
SwissFinBERT Precision: 0.8661630180498723
SwissFinBERT Recall: 0.8662284705739152
GermanFinBERT Accuracy: 0.47945955815227315
GermanFinBERT F1 Score: 0.43841002635002585
GermanFinBERT Precision: 0.6900440248156885
GermanFinBERT Recall: 0.47945955815227315
GermanSentimentBERT Accuracy: 0.3277950216055018
GermanSentimentBERT F1 Score: 0.25057184326910587
GermanSentimentBERT Precision: 0.3494742401281821
GermanSentimentBERT Recall: 0.3277950216055018


In [18]:
# Create the initial DataFrame
figures_df = pd.DataFrame({
    "SwissFinBERT": [swiss_fin_bert_accuracy, swiss_fin_bert_f1, swiss_fin_bert_precision, swiss_fin_bert_recall],
    "GermanFinBERT": [german_fin_bert_accuracy, german_fin_bert_f1, german_fin_bert_precision, german_fin_bert_recall],
    "GermanSentimentBERT": [german_sentiment_bert_accuracy, german_sentiment_bert_f1, german_sentiment_bert_precision, german_sentiment_bert_recall]
})

figures_df.index = ["Accuracy", "F1 Score", "Precision", "Recall"]
figures_df

Unnamed: 0,SwissFinBERT,GermanFinBERT,GermanSentimentBERT
Accuracy,0.866228,0.47946,0.327795
F1 Score,0.866189,0.43841,0.250572
Precision,0.866163,0.690044,0.349474
Recall,0.866228,0.47946,0.327795


In [19]:
figures_df.to_excel("model_performance_figures_v2.xlsx", index=False)

### Financial Phrasebank Test

In [2]:
from datasets import load_dataset

ds = load_dataset("scherrmann/financial_phrasebank_75agree_german")

In [3]:
train_df = ds['train'].to_pandas()
validation_df = ds['validation'].to_pandas()
test_df = ds['test'].to_pandas()

print(train_df.head())
print(validation_df.head())
print(test_df.head())

                                            sentence  label
0  ``Die rasch steigenden Kosten und die Stärkung...      0
1  Die finnische nationale Fluggesellschaft gab a...      0
2  Der Betriebsgewinn fiel von EUR 7,9 Mio. im zw...      0
3  Der Gewinn für den Berichtszeitraum betrug 10,...      0
4  Raute meldete für das erste Halbjahr 2009 eine...      0
                                            sentence  label
0  Stora Enso Oyj sagte, das Ergebnis des zweiten...      0
1  Kleinanleger haben die Befürchtung geäußert, d...      0
2  ``Diese Unsicherheiten trüben die langfristige...      0
3  Die verschärfte Wettbewerbssituation auf dem M...      0
4  Der Betriebsgewinn in der ersten Hälfte betrug...      0
                                            sentence  label
0  Im letzten Quartal 2010 verdoppelte sich der N...      2
1  Der Abonnentenstamm von MegaFon stieg 2009 um ...      2
2  Der litauische Biermarkt machte im Januar 14,4...      2
3  Die Provisionseinnahmen stiegen um 22

In [4]:
# Merge the datasets
df = pd.concat([train_df, validation_df, test_df], ignore_index=True)

# Display the merged dataset
print(df.head())


                                            sentence  label
0  ``Die rasch steigenden Kosten und die Stärkung...      0
1  Die finnische nationale Fluggesellschaft gab a...      0
2  Der Betriebsgewinn fiel von EUR 7,9 Mio. im zw...      0
3  Der Gewinn für den Berichtszeitraum betrug 10,...      0
4  Raute meldete für das erste Halbjahr 2009 eine...      0


In [5]:
# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("text-classification", model="AlGatone21/SwissFinBERT_v7", device=0)
pipe2 = pipeline("text-classification", model="scherrmann/GermanFinBert_SC_Sentiment", device=0)
pipe3 = pipeline("sentiment-analysis", model="oliverguhr/german-sentiment-bert", device=0)

In [6]:
df["SwissFinBERT_label"] = "pending"
df["GermanFinBERT_label"] = "pending"
df["GermanSentimentBERT_label"] = "pending"

In [7]:
for row in tqdm(range(len(df.sentence))):
    try:
        result = pipe(df.sentence[row])
        result2 = pipe2(df.sentence[row])
        result3 = pipe3(df.sentence[row])
        df.loc[row, "SwissFinBERT_label"] = result[0]["label"]
        df.loc[row, "GermanFinBERT_label"] = result2[0]["label"]
        df.loc[row, "GermanSentimentBERT_label"] = result3[0]["label"]
    except:
        pass

  0%|          | 10/3453 [00:00<02:32, 22.64it/s]--- Logging error ---
Traceback (most recent call last):
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/logging/__init__.py", line 1100, in emit
    msg = self.format(record)
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/logging/__init__.py", line 943, in format
    return fmt.format(record)
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/logging/__init__.py", line 678, in format
    record.message = record.getMessage()
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/logging/__init__.py", line 368, in getMessage
    msg = msg % self.args
TypeError: not all arguments converted during string formatting
Call stack:
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  

In [8]:
df.to_excel("financial_phrasebank_75agree_german_labelled_v2.xlsx", index=False)

In [2]:
df = pd.read_excel("financial_phrasebank_75agree_german_labelled_v2.xlsx")

In [None]:
client = OpenAI(api_key="insert your api key here")

In [6]:
df["GPT3_5_label"] = "pending"

3453

In [7]:
for i in tqdm(range(len(df.sentence))):
    if df.GPT3_5_label[i] == "pending":
        
        try:
            role = """
                You are a helpful financial analyst. I will provide you with a financial news article title in german and I want you to read it and give me your sentiment about it.
                    """

            instruction= """
            I will provide you with a financial news article titles in german and I want you to read it and give me your sentiment about it. You should
            express if the provided information is positive, negative or neutral for the swiss stock market. If you think that the information is positive
            and the stock market will likely go up, I want your answer be BUY. If you think that the information is negative and the stock market will likely go down,
            I want your answer be SELL. If you think that the information is neutral and the stock market will likely stay the same, I want your answer be HOLD.
            In the case the information is not really relevant for the stock market, I want your answer to be HOLD.

            I'am going to use this information to cluster the news articles and construct a weekly sentiment index for the swiss stock market.

            I want your answer be exacly one of the following options, just one word, dont add anything else:

            BUY, HOLD, SELL
                        """

            article_text = df.sentence[i]
            completion = client.chat.completions.create(
            model="gpt-3.5-turbo-0125",
            max_tokens=2,
            temperature=0.0,
            messages=[{"role": "system", "content": role},{"role": "user", "content": instruction+" "+article_text}])
            if completion.choices[0].message.content in ["BUY", "SELL", "HOLD", "Sell", "Buy", "Hold", "buy", "sell", "hold"]:
                df.loc[i, "GPT3_5_label"] = completion.choices[0].message.content
        except:
            pass

100%|██████████| 3453/3453 [20:25<00:00,  2.82it/s] 


In [9]:
df["Gemini_label"] = "pending"

In [None]:
import google.generativeai as genai

genai.configure(api_key='insert your api key here')

generation_config = {
  "temperature": 0,
  "top_p": 1,
  "top_k": 1,
  "max_output_tokens": 2,
}

model = genai.GenerativeModel(model_name="gemini-1.0-pro", generation_config=generation_config,)


role = """
You are a helpful financial analyst. I will provide you with a financial news article title in german and I want you to read it and give me your sentiment about it.
        """
instruction= """
            I will provide you with a financial news article titles in german and I want you to read it and give me your sentiment about it. You should
            express if the provided information is positive, negative or neutral for the swiss stock market. If you think that the information is positive
            and the stock market will likely go up, I want your answer be BUY. If you think that the information is negative and the stock market will likely go down,
            I want your answer be SELL. If you think that the information is neutral and the stock market will likely stay the same, I want your answer be HOLD.
            In the case the information is not really relevant for the stock market, I want your answer to be HOLD.

            I'am going to use this information to cluster the news articles and construct a weekly sentiment index for the swiss stock market.

            I want your answer be exacly one of the following options, just one word, dont add anything else:

            BUY, HOLD, SELL
            """

for i in tqdm(range(len(df.Gemini_label))):
    if df.Gemini_label[i] == "pending":
        try:  
          article_text = df.sentence[i]
          response = model.generate_content(f'{role} \n {instruction}\n{article_text}')
          if response.text in ["BUY", "SELL", "HOLD", "Sell", "Buy", "Hold", "buy", "sell", "hold"]:
              df.loc[i, "Gemini_label"] = response.text
        except:
            pass

100%|██████████| 3453/3453 [1:15:24<00:00,  1.31s/it]


In [11]:
df.to_excel("financial_phrasebank_75agree_german_labelled_v4.xlsx", index=False)

In [12]:
df = pd.read_excel("financial_phrasebank_75agree_german_labelled_v4.xlsx")

In [13]:
label_matchings = { "BUY" : 2, "HOLD" : 1, "SELL" : 0, "positive" : 2, "neutral" : 1, "negative" : 0, "pending" : 1, "Negativ" : 0, "Neutral" : 1, "Positiv" : 2}

df["GermanSentimentBERT_label"] = df["GermanSentimentBERT_label"].map(label_matchings)
df["GermanFinBERT_label"] = df["GermanFinBERT_label"].map(label_matchings)
df["SwissFinBERT_label"] = df["SwissFinBERT_label"].map(label_matchings)
df["GPT3_5_label"] = df["GPT3_5_label"].map(label_matchings)
df["Gemini_label"] = df["Gemini_label"].map(label_matchings)

df.head()

Unnamed: 0,sentence,label,SwissFinBERT_label,GermanFinBERT_label,GermanSentimentBERT_label,GPT3_5_label,Gemini_label
0,``Die rasch steigenden Kosten und die Stärkung...,0,0,0,1,0,0
1,Die finnische nationale Fluggesellschaft gab a...,0,0,0,1,0,1
2,"Der Betriebsgewinn fiel von EUR 7,9 Mio. im zw...",0,0,0,1,0,0
3,"Der Gewinn für den Berichtszeitraum betrug 10,...",0,0,0,1,0,0
4,Raute meldete für das erste Halbjahr 2009 eine...,0,0,0,1,0,0


In [14]:
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

# Calculate metrics for SwissFinBERT
swiss_fin_bert_accuracy = accuracy_score(df["label"], df["SwissFinBERT_label"])
swiss_fin_bert_f1 = f1_score(df["label"], df["SwissFinBERT_label"], average="weighted")
swiss_fin_bert_precision = precision_score(df["label"], df["SwissFinBERT_label"], average="weighted")
swiss_fin_bert_recall = recall_score(df["label"], df["SwissFinBERT_label"], average="weighted")

# Calculate metrics for GermanFinBERT
german_fin_bert_accuracy = accuracy_score(df["label"], df["GermanFinBERT_label"])
german_fin_bert_f1 = f1_score(df["label"], df["GermanFinBERT_label"], average="weighted")
german_fin_bert_precision = precision_score(df["label"], df["GermanFinBERT_label"], average="weighted")
german_fin_bert_recall = recall_score(df["label"], df["GermanFinBERT_label"], average="weighted")

# Calculate metrics for GermanSentimentBERT
german_sentiment_bert_accuracy = accuracy_score(df["label"], df["GermanSentimentBERT_label"])
german_sentiment_bert_f1 = f1_score(df["label"], df["GermanSentimentBERT_label"], average="weighted")
german_sentiment_bert_precision = precision_score(df["label"], df["GermanSentimentBERT_label"], average="weighted")
german_sentiment_bert_recall = recall_score(df["label"], df["GermanSentimentBERT_label"], average="weighted")

# Calculate metrics for Gemini
gemini_accuracy = accuracy_score(df["label"], df["Gemini_label"])
gemini_f1 = f1_score(df["label"], df["Gemini_label"], average="weighted")
gemini_precision = precision_score(df["label"], df["Gemini_label"], average="weighted")
gemini_recall = recall_score(df["label"], df["Gemini_label"], average="weighted")

# Calculate metrics for GPT-3.5
gpt3_5_accuracy = accuracy_score(df["label"], df["GPT3_5_label"])
gpt3_5_f1 = f1_score(df["label"], df["GPT3_5_label"], average="weighted")
gpt3_5_precision = precision_score(df["label"], df["GPT3_5_label"], average="weighted")
gpt3_5_recall = recall_score(df["label"], df["GPT3_5_label"], average="weighted")

# Print metrics
print("SwissFinBERT Accuracy:", swiss_fin_bert_accuracy)
print("SwissFinBERT F1 Score:", swiss_fin_bert_f1)
print("SwissFinBERT Precision:", swiss_fin_bert_precision)
print("SwissFinBERT Recall:", swiss_fin_bert_recall)

print("GermanFinBERT Accuracy:", german_fin_bert_accuracy)
print("GermanFinBERT F1 Score:", german_fin_bert_f1)
print("GermanFinBERT Precision:", german_fin_bert_precision)
print("GermanFinBERT Recall:", german_fin_bert_recall)

print("GermanSentimentBERT Accuracy:", german_sentiment_bert_accuracy)
print("GermanSentimentBERT F1 Score:", german_sentiment_bert_f1)
print("GermanSentimentBERT Precision:", german_sentiment_bert_precision)
print("GermanSentimentBERT Recall:", german_sentiment_bert_recall)

print("Gemini Accuracy:", gemini_accuracy)
print("Gemini F1 Score:", gemini_f1)
print("Gemini Precision:", gemini_precision)
print("Gemini Recall:", gemini_recall)

print("GPT-3.5 Accuracy:", gpt3_5_accuracy)
print("GPT-3.5 F1 Score:", gpt3_5_f1)
print("GPT-3.5 Precision:", gpt3_5_precision)
print("GPT-3.5 Recall:", gpt3_5_recall)


SwissFinBERT Accuracy: 0.93454966695627
SwissFinBERT F1 Score: 0.9358803821881395
SwissFinBERT Precision: 0.9439808292759859
SwissFinBERT Recall: 0.93454966695627
GermanFinBERT Accuracy: 0.9875470605270779
GermanFinBERT F1 Score: 0.9875403899990871
GermanFinBERT Precision: 0.9875562340799042
GermanFinBERT Recall: 0.9875470605270779
GermanSentimentBERT Accuracy: 0.6162757022878657
GermanSentimentBERT F1 Score: 0.4810987145609384
GermanSentimentBERT Precision: 0.5351492442435485
GermanSentimentBERT Recall: 0.6162757022878657
Gemini Accuracy: 0.7242977121343759
Gemini F1 Score: 0.6645724141291002
Gemini Precision: 0.8039742865266588
Gemini Recall: 0.7242977121343759
GPT-3.5 Accuracy: 0.8447726614538082
GPT-3.5 F1 Score: 0.8451061797842169
GPT-3.5 Precision: 0.8519160538376228
GPT-3.5 Recall: 0.8447726614538082


In [16]:
# Create the initial DataFrame
figures2_df = pd.DataFrame({
    "SwissFinBERT": [swiss_fin_bert_accuracy, swiss_fin_bert_f1, swiss_fin_bert_precision, swiss_fin_bert_recall],
    "GermanFinBERT": [german_fin_bert_accuracy, german_fin_bert_f1, german_fin_bert_precision, german_fin_bert_recall],
    "GermanSentimentBERT": [german_sentiment_bert_accuracy, german_sentiment_bert_f1, german_sentiment_bert_precision, german_sentiment_bert_recall],
    "Gemini": [gemini_accuracy, gemini_f1, gemini_precision, gemini_recall],
    "GPT-3.5": [gpt3_5_accuracy, gpt3_5_f1, gpt3_5_precision, gpt3_5_recall]
})

figures2_df.index = ["Accuracy", "F1 Score", "Precision", "Recall"]
figures2_df

Unnamed: 0,SwissFinBERT,GermanFinBERT,GermanSentimentBERT,Gemini,GPT-3.5
Accuracy,0.93455,0.987547,0.616276,0.724298,0.844773
F1 Score,0.93588,0.98754,0.481099,0.664572,0.845106
Precision,0.943981,0.987556,0.535149,0.803974,0.851916
Recall,0.93455,0.987547,0.616276,0.724298,0.844773


In [17]:
figures2_df.to_excel("model_performance_figures2_v3.xlsx", index=False)