In [None]:
pip install transformers torch pandas 

Note: you may need to restart the kernel to use updated packages.


In [7]:
import re

def preprocess_text(text):
    # 소문자 변환 (케바케, 크립토는 대문자 심볼 남겨도 됨)
    text = text.lower()
    
    # URL 제거
    text = re.sub(r'http\S+', '', text)
    
    # 멘션 제거
    text = re.sub(r'@[A-Za-z0-9_]+', '', text)
    
    # 해시태그 제거
    text = re.sub(r'#[A-Za-z0-9_]+', '', text)
    
    # 줄바꿈/탭 제거
    text = re.sub(r'[\n\t\r]', ' ', text)
    
    # 특수문자 제거 (단, $, %는 크립토 심볼 때문에 남길 수 있음)
    text = re.sub(r'[^a-zA-Z0-9$% ]', '', text)
    
    # 공백 정리
    text = re.sub(r'\s+', ' ', text).strip()
    
    return text

In [None]:
import pandas as pd
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline

# 데이터 불러오기
df = pd.read_csv('twitter_crawl_result.csv')

# 전처리 적용
df['cleaned_content'] = df['content'].apply(preprocess_text)

# FinBERT 모델 로드
model_name = "ProsusAI/finbert"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

classifier = pipeline("text-classification", model=model, tokenizer=tokenizer, framework="pt")

# 감성 분석 적용 (전처리된 텍스트 사용)
df['sentiment'] = df['cleaned_content'].apply(lambda x: classifier(x)[0]['label'].lower())

# 결과 확인
print(df[['timestamp', 'author', 'content', 'cleaned_content', 'sentiment']])

# 결과 저장
df.to_csv('twitter_sentiment_result.csv', index=False)

Device set to use mps:0


                     timestamp              author  \
0    2025-03-05 23:40:26+00:00           Bpay News   
1    2025-03-05 23:22:33+00:00  Bull & Bear Report   
2    2025-03-05 21:38:48+00:00     Sante Law Group   
3    2025-03-05 21:11:17+00:00    Doge Terminal AI   
4    2025-03-05 20:20:33+00:00             TknWire   
..                         ...                 ...   
787  2025-02-06 08:10:00+00:00   ChainGPT AI Agent   
788  2025-02-06 07:38:48+00:00        NFTsKing.eth   
789  2025-02-06 07:16:10+00:00         CryptoAlpha   
790  2025-02-06 06:37:04+00:00              yex.io   
791  2025-02-06 06:31:48+00:00     autonome intern   

                                               content  \
0    The SEC Crypto Task Force has met with represe...   
1    The US SEC and CFTC are joining forces to tack...   
2     Crypto & Government: Trump's proposal for a U...   
3    Trump's Strategic Crypto Reserve proposal aims...   
4    14 /\n\n- \n@CFTC\n   (3)* - Announces joint c...   
.. 

In [None]:
import pandas as pd
import plotly.express as px

# 감성분석 결과 불러오기
df = pd.read_csv('twitter_sentiment_result.csv')

# 날짜 컬럼 정리
df['date'] = pd.to_datetime(df['timestamp']).dt.date

# 감성점수 매핑
sentiment_score_map = {
    'positive': 1,
    'neutral': 0,
    'negative': -1
}
df['sentiment_score'] = df['sentiment'].map(sentiment_score_map)

# 날짜별 평균 감성점수 집계
daily_sentiment = df.groupby('date')['sentiment_score'].mean().reset_index()

# plotly로 시각화
fig = px.line(daily_sentiment, x='date', y='sentiment_score', 
              title='📈 Daily Average Sentiment Score', 
              markers=True, template='plotly_dark')

fig.update_traces(line=dict(width=3))
fig.update_layout(xaxis_title='Date', yaxis_title='Average Sentiment Score', font=dict(size=14))

fig.show()

# 그래프를 이미지로 저장
fig.write_image("resources/daily_average_sentiment_score.png")

In [None]:
# 날짜별 감성분포 집계
daily_counts = df.groupby(['date', 'sentiment']).size().reset_index(name='count')

# plotly로 스택드 바 차트
fig = px.bar(daily_counts, x='date', y='count', color='sentiment',
             title='📊 Daily Sentiment Distribution', 
             template='plotly_dark', barmode='stack',
             category_orders={"sentiment": ["negative", "neutral", "positive"]})

fig.update_layout(xaxis_title='Date', yaxis_title='Count', font=dict(size=14))

fig.show()

# 그래프를 이미지로 저장
fig.write_image("resources/daily_sentiment_distribution.png")