In [None]:
import pandas as pd
from transformers import pipeline
from datetime import datetime

df = pd.read_pickle('/content/test_label.pkl')
df['date'] = pd.to_datetime(df['date'])

In [None]:
df

Unnamed: 0,date,text,label
0,2025-04-30,deepseek releases new math ai model risk discl...,0
1,2025-04-30,adv micro device receives investment bank anal...,0
2,2025-04-30,nvidia corp receives investment bank analyst r...,0
3,2025-04-30,taiwans ase evaluating how it will support nvi...,0
4,2025-04-30,super micro slumps on forecast cut analysts do...,0
...,...,...,...
5118,2023-11-01,stock market today dow ends higher as treasury...,0
5119,2023-11-01,fed decision looms futures slip amd reports in...,0
5120,2023-11-01,megacap firm valuations fall amid rising rates...,0
5121,2023-11-01,us stocks rally as fed holds rates prompts hop...,0


In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TextClassificationPipeline

# Load model 1
tokenizer1 = AutoTokenizer.from_pretrained("ProsusAI/finbert")
model1_raw = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")
model1 = TextClassificationPipeline(model=model1_raw, tokenizer=tokenizer1, truncation=True)

# Load model 2
tokenizer2 = AutoTokenizer.from_pretrained("Photchara/stock_sentiment_Finbert_label")
model2_raw = AutoModelForSequenceClassification.from_pretrained("Photchara/stock_sentiment_Finbert_label")
model2 = TextClassificationPipeline(model=model2_raw, tokenizer=tokenizer2, truncation=True)

# Load model 3
tokenizer3 = AutoTokenizer.from_pretrained("Photchara/stock_sentiment_Finbert_v2")
model3_raw = AutoModelForSequenceClassification.from_pretrained("Photchara/stock_sentiment_Finbert_v2")
model3 = TextClassificationPipeline(model=model3_raw, tokenizer=tokenizer3, truncation=True)

# Load model 4
tokenizer4 = AutoTokenizer.from_pretrained("Photchara/stock_sentiment_pct_change_v1")
model4_raw = AutoModelForSequenceClassification.from_pretrained("Photchara/stock_sentiment_pct_change_v1")
model4 = TextClassificationPipeline(model=model4_raw, tokenizer=tokenizer4, truncation=True)

# Load model 5
tokenizer5 = AutoTokenizer.from_pretrained("Photchara/stock_sentiment_pct_change_v2")
model5_raw = AutoModelForSequenceClassification.from_pretrained("Photchara/stock_sentiment_pct_change_v2")
model5 = TextClassificationPipeline(model=model5_raw, tokenizer=tokenizer5, truncation=True)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0


In [None]:
def predict_sentiment(text):
    try:
        result1 = model1(text)[0]
        label1 = result1['label'].lower()

        result2 = model2(text)[0]
        label2 = result2['label'].lower()

        result3 = model3(text)[0]
        label3 = result3['label'].lower()

        result4 = model4(text)[0]
        label4 = result4['label'].lower()

        result5 = model5(text)[0]
        label5 = result5['label'].lower()

        return label1, label2, label3, label4,label5
    except:
        return None, None, None, None, None

In [None]:
df[['label1', 'label2', 'label3','label4','label5']] = df['text'].apply(
    lambda x: pd.Series(predict_sentiment(x))
)

You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


In [None]:
df

Unnamed: 0,date,text,label,label1,label2,label3,label4,label5,label1_num,label2_num,label3_num
0,2025-04-30,deepseek releases new math ai model risk discl...,0,neutral,neutral,neutral,positive,positive,0,0,0
1,2025-04-30,adv micro device receives investment bank anal...,0,neutral,neutral,neutral,positive,positive,0,0,0
2,2025-04-30,nvidia corp receives investment bank analyst r...,0,neutral,neutral,neutral,negative,negative,0,0,0
3,2025-04-30,taiwans ase evaluating how it will support nvi...,0,neutral,neutral,neutral,positive,positive,0,0,0
4,2025-04-30,super micro slumps on forecast cut analysts do...,0,negative,negative,negative,negative,positive,-1,-1,-1
...,...,...,...,...,...,...,...,...,...,...,...
5118,2023-11-01,stock market today dow ends higher as treasury...,0,positive,positive,positive,positive,negative,1,1,1
5119,2023-11-01,fed decision looms futures slip amd reports in...,0,negative,negative,negative,negative,positive,-1,-1,-1
5120,2023-11-01,megacap firm valuations fall amid rising rates...,0,negative,negative,negative,positive,positive,-1,-1,-1
5121,2023-11-01,us stocks rally as fed holds rates prompts hop...,0,negative,negative,negative,positive,negative,-1,-1,-1


In [None]:
label_mapping = {
    'negative': -1,
    'positive': 1,
    'neutral': 0
}

In [None]:
df['label1_num'] = df['label1'].map(label_mapping)
df['label2_num'] = df['label2'].map(label_mapping)
df['label3_num'] = df['label3'].map(label_mapping)
df['label4_num'] = df['label4'].map(label_mapping)
df['label5_num'] = df['label5'].map(label_mapping)

In [None]:
df

Unnamed: 0,date,text,label,label1,label2,label3,label4,label5,label1_num,label2_num,label3_num,label4_num,label5_num
0,2025-04-30,deepseek releases new math ai model risk discl...,0,neutral,neutral,neutral,positive,positive,0,0,0,1,1
1,2025-04-30,adv micro device receives investment bank anal...,0,neutral,neutral,neutral,positive,positive,0,0,0,1,1
2,2025-04-30,nvidia corp receives investment bank analyst r...,0,neutral,neutral,neutral,negative,negative,0,0,0,-1,-1
3,2025-04-30,taiwans ase evaluating how it will support nvi...,0,neutral,neutral,neutral,positive,positive,0,0,0,1,1
4,2025-04-30,super micro slumps on forecast cut analysts do...,0,negative,negative,negative,negative,positive,-1,-1,-1,-1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5118,2023-11-01,stock market today dow ends higher as treasury...,0,positive,positive,positive,positive,negative,1,1,1,1,-1
5119,2023-11-01,fed decision looms futures slip amd reports in...,0,negative,negative,negative,negative,positive,-1,-1,-1,-1,1
5120,2023-11-01,megacap firm valuations fall amid rising rates...,0,negative,negative,negative,positive,positive,-1,-1,-1,1,1
5121,2023-11-01,us stocks rally as fed holds rates prompts hop...,0,negative,negative,negative,positive,negative,-1,-1,-1,1,-1


In [None]:
df[df['date'] == '2024-11-11']

Unnamed: 0,date,text,label,label1,label2,label3,label4,label5,label1_num,label2_num,label3_num,label4_num,label5_num
2008,2024-11-11,monolithic power systems receives investment b...,0,neutral,neutral,neutral,positive,positive,0,0,0,1,1
2009,2024-11-11,marvell receives investment bank analyst ratin...,0,neutral,neutral,neutral,negative,negative,0,0,0,-1,-1
2010,2024-11-11,nvidia price target raised to 165 by mizuho by...,0,positive,positive,positive,neutral,positive,1,1,1,0,1
2011,2024-11-11,monolithic power systems receives investment b...,0,neutral,neutral,neutral,positive,positive,0,0,0,1,1
2012,2024-11-11,nvidia pt receives investment bank analyst rat...,0,neutral,neutral,neutral,negative,negative,0,0,0,-1,-1
2013,2024-11-11,ubs raises nvidia target to 185 maintains buy ...,0,positive,positive,positive,positive,positive,1,1,1,1,1
2014,2024-11-11,morgan stanley bumps nvidia shares target on m...,0,positive,positive,positive,negative,positive,1,1,1,-1,1
2015,2024-11-11,nvidia pt receives investment bank analyst rat...,0,neutral,neutral,neutral,negative,negative,0,0,0,-1,-1
2016,2024-11-11,nvidia falls to low down 27 by investingcom ri...,0,neutral,neutral,neutral,negative,negative,0,0,0,-1,-1
2017,2024-11-11,morgan stanley lifts nvidia price target estim...,0,positive,positive,positive,positive,negative,1,1,1,1,-1


In [None]:
avg_df = df.groupby('date', as_index=False)[['label1_num', 'label2_num','label3_num','label4_num','label5_num']].mean()
# avg_df = df.groupby('date', as_index=False)[['label1_num', 'label2_num','label3_num']].agg(lambda x: x.mode()[0])

In [None]:
avg_df.describe()

Unnamed: 0,date,label1_num,label2_num,label3_num,label4_num,label5_num
count,372,372.0,372.0,372.0,372.0,372.0
mean,2024-08-01 07:48:23.225806592,-0.006079,-0.011037,-0.009238,0.222751,0.385934
min,2023-11-01 00:00:00,-1.0,-1.0,-1.0,-1.0,-1.0
25%,2024-03-19 18:00:00,-0.202083,-0.222222,-0.227047,0.0,0.16859
50%,2024-08-01 12:00:00,0.0,0.0,0.0,0.213203,0.366029
75%,2024-12-12 06:00:00,0.1875,0.2,0.2,0.454545,0.627841
max,2025-04-30 00:00:00,1.0,1.0,1.0,1.0,1.0
std,,0.334066,0.344237,0.346315,0.362979,0.344588


In [None]:
def score_to_sentiment(score):
    if score > 0.1:
        return 'positive'
    elif score < -0.1:
        return 'negative'
    else:
        return 'neutral'

In [None]:
avg_df['sentiment1'] = avg_df['label1_num'].apply(score_to_sentiment)
avg_df['sentiment2'] = avg_df['label2_num'].apply(score_to_sentiment)
avg_df['sentiment3'] = avg_df['label3_num'].apply(score_to_sentiment)
avg_df['sentiment4'] = avg_df['label4_num'].apply(score_to_sentiment)
avg_df['sentiment5'] = avg_df['label5_num'].apply(score_to_sentiment)

In [None]:
# sentiment_map = {1:'positive', 0:'neutral', -1:'negative'}
# avg_df['label1_num'] = avg_df['label1_num'].map(sentiment_map)
# avg_df['label2_num'] = avg_df['label2_num'].map(sentiment_map)

In [None]:
avg_df

Unnamed: 0,date,label1_num,label2_num,label3_num,label4_num,label5_num,sentiment1,sentiment2,sentiment3,sentiment4,sentiment5
0,2023-11-01,-0.555556,-0.555556,-0.333333,0.222222,0.222222,negative,negative,negative,positive,positive
1,2023-11-02,0.400000,0.400000,0.400000,0.200000,-0.200000,positive,positive,positive,positive,negative
2,2023-11-03,-0.500000,-0.500000,-0.500000,0.500000,0.500000,negative,negative,negative,positive,positive
3,2023-11-06,-0.166667,-0.166667,-0.166667,0.000000,0.000000,negative,negative,negative,neutral,neutral
4,2023-11-07,0.250000,0.375000,0.375000,0.500000,1.000000,positive,positive,positive,positive,positive
...,...,...,...,...,...,...,...,...,...,...,...
367,2025-04-24,0.000000,-0.066667,0.000000,0.333333,0.333333,neutral,neutral,neutral,positive,positive
368,2025-04-25,-0.333333,-0.333333,-0.333333,0.666667,0.666667,negative,negative,negative,positive,positive
369,2025-04-28,-0.461538,-0.461538,-0.461538,0.538462,0.846154,negative,negative,negative,positive,positive
370,2025-04-29,-0.105263,-0.157895,-0.157895,0.157895,0.263158,negative,negative,negative,positive,positive


In [None]:
# นับแต่ละ combination ของ sentiment1 และ sentiment2
sentiment_combo_counts = avg_df.groupby(['sentiment2', 'sentiment4','sentiment5']).size().reset_index(name='count')

# หรือใช้ value_counts แบบ tuple
# sentiment_combo_counts = df[['sentiment1', 'sentiment2']].value_counts().reset_index(name='count')

print(sentiment_combo_counts)


   sentiment2 sentiment4 sentiment5  count
0    negative   negative   negative     11
1    negative   negative    neutral      4
2    negative   negative   positive     27
3    negative    neutral   negative      1
4    negative    neutral    neutral      9
5    negative    neutral   positive     24
6    negative   positive   negative      2
7    negative   positive    neutral      6
8    negative   positive   positive     63
9     neutral   negative   negative      2
10    neutral   negative   positive      5
11    neutral    neutral   negative      1
12    neutral    neutral    neutral      4
13    neutral    neutral   positive     10
14    neutral   positive   negative      2
15    neutral   positive    neutral      5
16    neutral   positive   positive     51
17   positive   negative   negative      3
18   positive   negative    neutral      3
19   positive   negative   positive      7
20   positive    neutral   negative      1
21   positive    neutral    neutral      5
22   positi

In [None]:
avg_df = avg_df.rename(columns={'sentiment1': 'finBert','sentiment2': 'finBert_finbert_label','sentiment3': 'GGBert_finbert_label','sentiment4': 'finBert_return_next_label','sentiment5': 'GGBert_return_next_label'})
avg_df

Unnamed: 0,date,label1_num,label2_num,label3_num,label4_num,label5_num,finBert,finBert_finbert_label,GGBert_finbert_label,finBert_return_next_label,GGBert_return_next_label
0,2023-11-01,-0.555556,-0.555556,-0.333333,0.222222,0.222222,negative,negative,negative,positive,positive
1,2023-11-02,0.400000,0.400000,0.400000,0.200000,-0.200000,positive,positive,positive,positive,negative
2,2023-11-03,-0.500000,-0.500000,-0.500000,0.500000,0.500000,negative,negative,negative,positive,positive
3,2023-11-06,-0.166667,-0.166667,-0.166667,0.000000,0.000000,negative,negative,negative,neutral,neutral
4,2023-11-07,0.250000,0.375000,0.375000,0.500000,1.000000,positive,positive,positive,positive,positive
...,...,...,...,...,...,...,...,...,...,...,...
367,2025-04-24,0.000000,-0.066667,0.000000,0.333333,0.333333,neutral,neutral,neutral,positive,positive
368,2025-04-25,-0.333333,-0.333333,-0.333333,0.666667,0.666667,negative,negative,negative,positive,positive
369,2025-04-28,-0.461538,-0.461538,-0.461538,0.538462,0.846154,negative,negative,negative,positive,positive
370,2025-04-29,-0.105263,-0.157895,-0.157895,0.157895,0.263158,negative,negative,negative,positive,positive


In [None]:
avg_df[['date', 'finBert', 'finBert_finbert_label' ,'GGBert_finbert_label','finBert_return_next_label','GGBert_return_next_label']].to_csv('sentiments_final.csv', index=False)

In [None]:
test_ans = df.groupby('date', as_index=False)['label'].mean()

In [None]:
test_ans['label'] = test_ans['label'].map({0:'positive',1:'negative',2:'neutral'})

In [None]:
test_ans

Unnamed: 0,date,label
0,2023-11-01,positive
1,2023-11-02,positive
2,2023-11-03,positive
3,2023-11-06,neutral
4,2023-11-07,positive
...,...,...
367,2025-04-24,positive
368,2025-04-25,negative
369,2025-04-28,neutral
370,2025-04-29,neutral


In [None]:
df_merged = pd.merge(avg_df, test_ans, on='date')
from sklearn.metrics import accuracy_score

# วัด accuracy ของแต่ละโมเดล
acc1 = accuracy_score(df_merged['label'], df_merged['finBert_finbert_label'])
acc2 = accuracy_score(df_merged['label'], df_merged['finBert_return_next_label'])
acc3 = accuracy_score(df_merged['label'], df_merged['GGBert_return_next_label'])
print(f"Accuracy ของ model 1: {acc1:.6f}")
print(f"Accuracy ของ model 2: {acc2:.6f}")
print(f"Accuracy ของ model 3: {acc3:.6f}")

Accuracy ของ model 1: 0.362903
Accuracy ของ model 2: 0.419355
Accuracy ของ model 3: 0.451613


In [None]:
df_merged[df_merged['sentiment1'] != df_merged['sentiment3']]

Unnamed: 0,date,label1_num,label2_num,label3_num,sentiment1,sentiment2,sentiment3,label
0,2023-11-01,-0.555556,0.222222,0.222222,negative,positive,positive,positive
1,2023-11-02,0.400000,0.200000,-0.200000,positive,positive,negative,positive
2,2023-11-03,-0.500000,0.500000,0.500000,negative,positive,positive,positive
3,2023-11-06,-0.166667,0.000000,0.000000,negative,neutral,neutral,neutral
5,2023-11-08,0.000000,0.000000,-1.000000,neutral,neutral,negative,positive
...,...,...,...,...,...,...,...,...
367,2025-04-24,-0.066667,0.333333,0.333333,neutral,positive,positive,positive
368,2025-04-25,-0.333333,0.666667,0.666667,negative,positive,positive,negative
369,2025-04-28,-0.461538,0.538462,0.846154,negative,positive,positive,neutral
370,2025-04-29,-0.157895,0.157895,0.263158,negative,positive,positive,neutral
