In [None]:
import pandas as pd
from transformers import pipeline
from datetime import datetime

df = pd.read_pickle('/content/test_label.pkl')
df['date'] = pd.to_datetime(df['date'])

In [None]:
df

Unnamed: 0,date,text,label
0,2025-04-30,deepseek releases new math ai model risk discl...,0
1,2025-04-30,adv micro device receives investment bank anal...,0
2,2025-04-30,nvidia corp receives investment bank analyst r...,0
3,2025-04-30,taiwans ase evaluating how it will support nvi...,0
4,2025-04-30,super micro slumps on forecast cut analysts do...,0
...,...,...,...
5118,2023-11-01,stock market today dow ends higher as treasury...,0
5119,2023-11-01,fed decision looms futures slip amd reports in...,0
5120,2023-11-01,megacap firm valuations fall amid rising rates...,0
5121,2023-11-01,us stocks rally as fed holds rates prompts hop...,0


In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TextClassificationPipeline

# Load model 1
tokenizer1 = AutoTokenizer.from_pretrained("Photchara/stock_sentiment_Finbert_label")
model1_raw = AutoModelForSequenceClassification.from_pretrained("Photchara/stock_sentiment_Finbert_label")
model1 = TextClassificationPipeline(model=model1_raw, tokenizer=tokenizer1, truncation=True)

# Load model 2
tokenizer2 = AutoTokenizer.from_pretrained("Photchara/stock_sentiment_pct_change_v1")
model2_raw = AutoModelForSequenceClassification.from_pretrained("Photchara/stock_sentiment_pct_change_v1")
model2 = TextClassificationPipeline(model=model2_raw, tokenizer=tokenizer2, truncation=True)

# Load model 3
tokenizer3 = AutoTokenizer.from_pretrained("ProsusAI/finbert")
model3_raw = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")
model3 = TextClassificationPipeline(model=model3_raw, tokenizer=tokenizer3, truncation=True)

Device set to use cuda:0
Device set to use cuda:0


tokenizer_config.json:   0%|          | 0.00/252 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/758 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

Device set to use cuda:0


In [None]:
def predict_sentiment(text):
    try:
        result1 = model1(text)[0]
        label1 = result1['label'].lower()

        result2 = model2(text)[0]
        label2 = result2['label'].lower()

        result3 = model3(text)[0]
        label3 = result3['label'].lower()
        return label1, label2, label3
    except:
        return None, None, None

In [None]:
df[['label1', 'label2', 'label3']] = df['text'].apply(
    lambda x: pd.Series(predict_sentiment(x))
)

In [None]:
label_mapping = {
    'negative': -1,
    'positive': 1,
    'neutral': 0
}

In [None]:
df['label1_num'] = df['label1'].map(label_mapping)
df['label2_num'] = df['label2'].map(label_mapping)
df['label3_num'] = df['label3'].map(label_mapping)

In [None]:
df

Unnamed: 0,date,text,label,label1,label2,label1_num,label2_num,label3,label3_num
0,2025-04-30,deepseek releases new math ai model risk discl...,0,neutral,positive,0,1,neutral,0
1,2025-04-30,adv micro device receives investment bank anal...,0,neutral,positive,0,1,neutral,0
2,2025-04-30,nvidia corp receives investment bank analyst r...,0,neutral,negative,0,-1,neutral,0
3,2025-04-30,taiwans ase evaluating how it will support nvi...,0,neutral,positive,0,1,neutral,0
4,2025-04-30,super micro slumps on forecast cut analysts do...,0,negative,negative,-1,-1,negative,-1
...,...,...,...,...,...,...,...,...,...
5118,2023-11-01,stock market today dow ends higher as treasury...,0,positive,positive,1,1,positive,1
5119,2023-11-01,fed decision looms futures slip amd reports in...,0,negative,negative,-1,-1,negative,-1
5120,2023-11-01,megacap firm valuations fall amid rising rates...,0,negative,positive,-1,1,negative,-1
5121,2023-11-01,us stocks rally as fed holds rates prompts hop...,0,negative,positive,-1,1,negative,-1


In [None]:
df[df['date'] == '2024-11-11']

Unnamed: 0,date,text,label,label1,label2,label1_num,label2_num,label3,label3_num
2008,2024-11-11,monolithic power systems receives investment b...,0,neutral,positive,0,1,neutral,0
2009,2024-11-11,marvell receives investment bank analyst ratin...,0,neutral,negative,0,-1,neutral,0
2010,2024-11-11,nvidia price target raised to 165 by mizuho by...,0,positive,neutral,1,0,positive,1
2011,2024-11-11,monolithic power systems receives investment b...,0,neutral,positive,0,1,neutral,0
2012,2024-11-11,nvidia pt receives investment bank analyst rat...,0,neutral,negative,0,-1,neutral,0
2013,2024-11-11,ubs raises nvidia target to 185 maintains buy ...,0,positive,positive,1,1,positive,1
2014,2024-11-11,morgan stanley bumps nvidia shares target on m...,0,positive,negative,1,-1,positive,1
2015,2024-11-11,nvidia pt receives investment bank analyst rat...,0,neutral,negative,0,-1,neutral,0
2016,2024-11-11,nvidia falls to low down 27 by investingcom ri...,0,neutral,negative,0,-1,neutral,0
2017,2024-11-11,morgan stanley lifts nvidia price target estim...,0,positive,positive,1,1,positive,1


In [None]:
avg_df = df.groupby('date', as_index=False)[['label1_num', 'label2_num','label3_num']].mean()
# avg_df = df.groupby('date', as_index=False)[['label1_num', 'label2_num','label3_num']].agg(lambda x: x.mode()[0])

In [None]:
avg_df

Unnamed: 0,date,label1_num,label2_num,label3_num
0,2023-11-01,-0.555556,0.222222,-0.555556
1,2023-11-02,0.400000,0.200000,0.400000
2,2023-11-03,-0.500000,0.500000,-0.500000
3,2023-11-06,-0.166667,0.000000,-0.166667
4,2023-11-07,0.375000,0.500000,0.250000
...,...,...,...,...
367,2025-04-24,-0.066667,0.333333,0.000000
368,2025-04-25,-0.333333,0.666667,-0.333333
369,2025-04-28,-0.461538,0.538462,-0.461538
370,2025-04-29,-0.157895,0.157895,-0.105263


In [None]:
def score_to_sentiment(score):
    if score > 0.1:
        return 'positive'
    elif score < -0.1:
        return 'negative'
    else:
        return 'neutral'

In [None]:
avg_df['sentiment1'] = avg_df['label1_num'].apply(score_to_sentiment)
avg_df['sentiment2'] = avg_df['label2_num'].apply(score_to_sentiment)
avg_df['sentiment3'] = avg_df['label3_num'].apply(score_to_sentiment)

In [None]:
# sentiment_map = {1:'positive', 0:'neutral', -1:'negative'}
# avg_df['label1_num'] = avg_df['label1_num'].map(sentiment_map)
# avg_df['label2_num'] = avg_df['label2_num'].map(sentiment_map)

In [None]:
avg_df

Unnamed: 0,date,label1_num,label2_num,label3_num,sentiment1,sentiment2,sentiment3
0,2023-11-01,-0.555556,0.222222,-0.555556,negative,positive,negative
1,2023-11-02,0.400000,0.200000,0.400000,positive,positive,positive
2,2023-11-03,-0.500000,0.500000,-0.500000,negative,positive,negative
3,2023-11-06,-0.166667,0.000000,-0.166667,negative,neutral,negative
4,2023-11-07,0.375000,0.500000,0.250000,positive,positive,positive
...,...,...,...,...,...,...,...
367,2025-04-24,-0.066667,0.333333,0.000000,neutral,positive,neutral
368,2025-04-25,-0.333333,0.666667,-0.333333,negative,positive,negative
369,2025-04-28,-0.461538,0.538462,-0.461538,negative,positive,negative
370,2025-04-29,-0.157895,0.157895,-0.105263,negative,positive,negative


In [None]:
# นับแต่ละ combination ของ sentiment1 และ sentiment2
sentiment_combo_counts = avg_df.groupby(['sentiment1', 'sentiment2','sentiment3']).size().reset_index(name='count')

# หรือใช้ value_counts แบบ tuple
# sentiment_combo_counts = df[['sentiment1', 'sentiment2']].value_counts().reset_index(name='count')

print(sentiment_combo_counts)


   sentiment1 sentiment2 sentiment3  count
0    negative   negative   negative     41
1    negative   negative    neutral      1
2    negative    neutral   negative     30
3    negative    neutral    neutral      4
4    negative   positive   negative     61
5    negative   positive    neutral      8
6    negative   positive   positive      2
7     neutral   negative    neutral      5
8     neutral   negative   positive      2
9     neutral    neutral   negative      1
10    neutral    neutral    neutral     11
11    neutral    neutral   positive      3
12    neutral   positive   negative      4
13    neutral   positive    neutral     49
14    neutral   positive   positive      5
15   positive   negative   negative      1
16   positive   negative    neutral      1
17   positive   negative   positive     11
18   positive    neutral    neutral      1
19   positive    neutral   positive     21
20   positive   positive    neutral      8
21   positive   positive   positive    102


In [None]:
avg_df[['date', 'sentiment1', 'sentiment2' ,'sentiment3']].to_csv('date_sentiments.csv', index=False)

In [None]:
test_ans = df.groupby('date', as_index=False)['label'].mean()

In [None]:
test_ans['label'] = test_ans['label'].map({0:'positive',1:'negative',2:'neutral'})

In [None]:
test_ans

Unnamed: 0,date,label
0,2023-11-01,positive
1,2023-11-02,positive
2,2023-11-03,positive
3,2023-11-06,neutral
4,2023-11-07,positive
...,...,...
367,2025-04-24,positive
368,2025-04-25,negative
369,2025-04-28,neutral
370,2025-04-29,neutral


In [None]:
df_merged = pd.merge(avg_df, test_ans, on='date')
from sklearn.metrics import accuracy_score

# วัด accuracy ของแต่ละโมเดล
acc1 = accuracy_score(df_merged['label'], df_merged['sentiment1'])
acc2 = accuracy_score(df_merged['label'], df_merged['sentiment2'])
acc3 = accuracy_score(df_merged['label'], df_merged['sentiment3'])
print(f"Accuracy ของ model 1: {acc1:.6f}")
print(f"Accuracy ของ model 2: {acc2:.6f}")
print(f"Accuracy ของ model 3: {acc3:.6f}")

Accuracy ของ model 1: 0.362903
Accuracy ของ model 2: 0.419355
Accuracy ของ model 3: 0.362903


In [None]:
df_merged[df_merged['sentiment1'] != df_merged['sentiment3']]

Unnamed: 0,date,label1_num,label2_num,label3_num,sentiment1,sentiment2,sentiment3,label
5,2023-11-08,0.0,0.0,-0.5,neutral,neutral,negative,positive
7,2023-11-10,0.166667,0.333333,0.0,positive,positive,neutral,positive
13,2023-11-20,-0.105263,0.263158,0.0,negative,positive,neutral,negative
60,2024-02-01,0.125,0.5,0.0,positive,positive,neutral,positive
66,2024-02-09,0.111111,0.111111,0.0,positive,positive,neutral,neutral
76,2024-02-26,0.12,0.36,0.08,positive,positive,neutral,neutral
77,2024-02-27,0.25,-0.5,0.0,positive,negative,neutral,negative
86,2024-03-11,-0.3,0.5,-0.1,negative,positive,neutral,positive
96,2024-03-25,0.2,0.6,0.1,positive,positive,neutral,negative
109,2024-04-12,-0.166667,0.333333,0.0,negative,positive,neutral,negative
