<a href="https://colab.research.google.com/github/Azaidi317/LLM-Finetuning-Projects/blob/main/sentiment_analysis_of_stock_based_on_existing_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
pip install praw

Collecting praw
  Downloading praw-7.8.1-py3-none-any.whl.metadata (9.4 kB)
Collecting prawcore<3,>=2.4 (from praw)
  Downloading prawcore-2.4.0-py3-none-any.whl.metadata (5.0 kB)
Collecting update_checker>=0.18 (from praw)
  Downloading update_checker-0.18.0-py3-none-any.whl.metadata (2.3 kB)
Downloading praw-7.8.1-py3-none-any.whl (189 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m189.3/189.3 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading prawcore-2.4.0-py3-none-any.whl (17 kB)
Downloading update_checker-0.18.0-py3-none-any.whl (7.0 kB)
Installing collected packages: update_checker, prawcore, praw
Successfully installed praw-7.8.1 prawcore-2.4.0 update_checker-0.18.0


In [5]:
import praw
import pandas as pd
from datetime import datetime
from transformers import pipeline
import numpy as np
from tqdm import tqdm
import time

class StockSentimentAnalyzer:
    def __init__(self, client_id, client_secret, user_agent):
        # Initialize Reddit API
        self.reddit = praw.Reddit(
            client_id='Nr1OwEqV_a8GVY3_jxU9-w',
            client_secret='sO0micrWKauECqX7bkR4ztxtLxtsEA',
            user_agent='stock_bot/1.0 by Same_Can_7313',
            check_for_async=False
        )

        # Initialize sentiment analyzer
        # Using pre-trained financial sentiment model
        self.sentiment_analyzer = pipeline(
            "sentiment-analysis",
            model="ProsusAI/finbert"  # Specifically trained for financial text
        )

    def analyze_text_sentiment(self, text):
        """Analyze sentiment of a piece of text"""
        try:
            # Ensure text is not empty
            if not text or pd.isna(text):
                return {
                    'label': 'neutral',
                    'score': 0.0
                }

            # Analyze sentiment
            result = self.sentiment_analyzer(text[:512])[0]  # Limit text length
            return result

        except Exception as e:
            print(f"Error analyzing sentiment: {str(e)}")
            return {
                'label': 'neutral',
                'score': 0.0
            }

    def collect_and_analyze(self, subreddit_name="RIVNstock",
                          stock_symbol="RIVN", limit=50):
        """Collect posts and analyze their sentiment"""
        try:
            # Access subreddit
            subreddit = self.reddit.subreddit(subreddit_name)
            posts_data = []

            print(f"Collecting and analyzing posts about {stock_symbol} from r/{subreddit_name}...")

            # Search for posts containing the stock symbol
            for post in tqdm(subreddit.search(stock_symbol, limit=limit, sort='new')):
                # Combine title and text for sentiment analysis
                full_text = f"{post.title} {post.selftext}"
                sentiment = self.analyze_text_sentiment(full_text)

                post_data = {
                    'title': post.title,
                    'text': post.selftext,
                    'score': post.score,
                    'created_utc': datetime.fromtimestamp(post.created_utc),
                    'num_comments': post.num_comments,
                    'url': f"https://reddit.com{post.permalink}",
                    'sentiment': sentiment['label'],
                    'sentiment_score': sentiment['score']
                }
                posts_data.append(post_data)
                time.sleep(0.5)  # Add delay to avoid rate limiting

            # Convert to DataFrame
            df = pd.DataFrame(posts_data)

            # Calculate sentiment statistics
            sentiment_stats = self.calculate_sentiment_stats(df)

            # Save to CSV
            filename = f"{stock_symbol}_{subreddit_name}_sentiment.csv"
            df.to_csv(filename, index=False)
            print(f"\nData saved to {filename}")

            return df, sentiment_stats

        except Exception as e:
            print(f"Error collecting data: {str(e)}")
            return None, None

    def calculate_sentiment_stats(self, df):
        """Calculate sentiment statistics"""
        stats = {
            'total_posts': len(df),
            'sentiment_distribution': df['sentiment'].value_counts().to_dict(),
            'average_sentiment_score': df['sentiment_score'].mean(),
            'sentiment_by_day': df.groupby(df['created_utc'].dt.date)['sentiment'].value_counts().unstack().fillna(0).to_dict(),
            'high_impact_posts': df.nlargest(5, 'score')[['title', 'sentiment', 'score', 'url']].to_dict('records')
        }
        return stats

def print_sentiment_analysis(stats):
    """Print sentiment analysis in a readable format"""
    print("\nSentiment Analysis Results:")
    print("=" * 50)

    print("\nOverall Statistics:")
    print(f"Total Posts Analyzed: {stats['total_posts']}")

    print("\nSentiment Distribution:")
    for sentiment, count in stats['sentiment_distribution'].items():
        percentage = (count / stats['total_posts']) * 100
        print(f"{sentiment}: {count} posts ({percentage:.1f}%)")

    print(f"\nAverage Sentiment Score: {stats['average_sentiment_score']:.3f}")

    print("\nTop Posts by Impact:")
    for post in stats['high_impact_posts']:
        print(f"\nTitle: {post['title']}")
        print(f"Sentiment: {post['sentiment']}")
        print(f"Score: {post['score']}")
        print(f"URL: {post['url']}")

# Example usage
if __name__ == "__main__":
    # Your Reddit API credentials
    CLIENT_ID = "your_client_id"
    CLIENT_SECRET = "your_client_secret"
    USER_AGENT = "StockSentimentBot/1.0"

    # Initialize analyzer
    analyzer = StockSentimentAnalyzer(
        client_id=CLIENT_ID,
        client_secret=CLIENT_SECRET,
        user_agent=USER_AGENT
    )

    # Collect and analyze posts
    df, stats = analyzer.collect_and_analyze(
        subreddit_name="RIVNstock",
        stock_symbol="RIVN",
        limit=50
    )

    if df is not None and stats is not None:
        # Print analysis
        print_sentiment_analysis(stats)

        # Example of filtering strong sentiments
        strong_sentiments = df[df['sentiment_score'] > 0.8]
        print("\nStrongly Opinionated Posts:")
        for _, post in strong_sentiments.iterrows():
            print(f"\nTitle: {post['title']}")
            print(f"Sentiment: {post['sentiment']} (Score: {post['sentiment_score']:.3f})")

Collecting and analyzing posts about RIVN from r/RIVNstock...


50it [00:39,  1.26it/s]


Data saved to RIVN_RIVNstock_sentiment.csv

Sentiment Analysis Results:

Overall Statistics:
Total Posts Analyzed: 50

Sentiment Distribution:
neutral: 39 posts (78.0%)
negative: 7 posts (14.0%)
positive: 4 posts (8.0%)

Average Sentiment Score: 0.835

Top Posts by Impact:

Title: Added 2600 RIVN Stonks at $9.59
Sentiment: neutral
Score: 80
URL: https://reddit.com/r/RIVNstock/comments/1gs50j7/added_2600_rivn_stonks_at_959/

Title: HOLD STRONG
Sentiment: negative
Score: 51
URL: https://reddit.com/r/RIVNstock/comments/1flrkf3/hold_strong/

Title: How to debunk short theses? aka Why I am massively bullish on RIVN?
Sentiment: neutral
Score: 45
URL: https://reddit.com/r/RIVNstock/comments/1gbn98j/how_to_debunk_short_theses_aka_why_i_am_massively/

Title: RIVN $12 or $9?
Sentiment: neutral
Score: 41
URL: https://reddit.com/r/RIVNstock/comments/1g7pwer/rivn_12_or_9/

Title: RIVN Squeeze Coming!
Sentiment: neutral
Score: 39
URL: https://reddit.com/r/RIVNstock/comments/1gpxzlm/rivn_squeeze_com


