In [1]:
import requests
import pandas as pd
import yfinance as yf
import time
import os
import praw
import re
import datetime
import prawcore
import tweepy
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import prawcore.exceptions as pc_exc
import snscrape.modules.twitter as sntwitter
import snscrape.base as sb_exc

# Yahoo Finance Trending Stocks

In [2]:
HEADERS = {
    "User-Agent": (
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
        "AppleWebKit/537.36 (KHTML, like Gecko) "
        "Chrome/137.0.0.0 Safari/537.36"
    )
}

def filter_to_equities(symbols):
    equities = []
    for sym in symbols:
        if not re.match(r'^[A-Za-z0-9]+$', sym):
           continue
        info = yf.Ticker(sym).info
        if info.get("quoteType") == "EQUITY":
            equities.append(sym)
    return equities

def fetch_trending_symbols(count=10):
    
    """Step 1: get just the tickers that are trending"""
    url = "https://query1.finance.yahoo.com/v1/finance/trending/US"
    params = {
        "lang": "en-US",
        "region": "US",
        "count": count,
        "corsDomain": "finance.yahoo.com",
    }
    resp = requests.get(url, headers=HEADERS, params=params)
    resp.raise_for_status()
    data = resp.json()
    # drill into the list of quotes, each quote only has 'symbol'
    symbols = [q["symbol"] for q in data["finance"]["result"][0]["quotes"]]
    result = filter_to_equities(symbols)

    return result

In [3]:
def fetch_top_equities(n, initial_batch=20, max_batch=200, backoff=1.0):
    """
    Return exactly `n` trending equities by:
      1) fetching an increasing batch of symbols,
      2) filtering to quoteType == "EQUITY",
      3) stopping once you have `n` (or raising if max_batch is exceeded).
    """
    batch_size = initial_batch
    equities = []

    while True:
        symbols = fetch_trending_symbols(batch_size)
        equities = []
        for sym in symbols:
            info = yf.Ticker(sym).info
            if info.get("quoteType") == "EQUITY":
                equities.append({
                    "ticker":     sym,
                    "name":       info.get("shortName"),
                    "price":      info.get("regularMarketPrice"),
                    "change_pct": info.get("regularMarketChangePercent"),
                })
                if len(equities) >= n:
                    break

        if len(equities) >= n:
            # Got enough equities!
            return pd.DataFrame(equities[:n])

        # Not enough yet: increase batch and retry (with optional delay)
        if batch_size >= max_batch:
            raise RuntimeError(
                f"Only found {len(equities)} equities in top {batch_size} trending; "
                f"max_batch ({max_batch}) reached."
            )
        batch_size = min(batch_size * 2, max_batch)
        time.sleep(backoff)   # polite pause before re-requesting
    

In [4]:
start_time = time.perf_counter()

df_equities = fetch_top_equities(20, initial_batch=20, max_batch=200)
print(df_equities)
end_time = time.perf_counter()
print(f"Results took: {end_time - start_time}")

   ticker                             name      price  change_pct
0    LCID                Lucid Group, Inc.     3.1150   36.026203
1     AMC  AMC Entertainment Holdings, Inc     3.5150   10.534593
2     PEP                    Pepsico, Inc.   145.4956    7.495825
3     NIO                         NIO Inc.     4.4050    7.177617
4    RIVN          Rivian Automotive, Inc.    12.9500    4.519770
5    BBAI                 BigBear.ai, Inc.     8.0350   12.851124
6      QS         QuantumScape Corporation    13.3450   17.577091
7    PLTR       Palantir Technologies Inc.   154.1050    2.117151
8     ABT              Abbott Laboratories   121.3900   -7.856388
9    NVDA               NVIDIA Corporation   173.3550    1.158313
10    TSM  Taiwan Semiconductor Manufactur   247.7550    4.291551
11   NFLX                    Netflix, Inc.  1271.9950    1.734365
12    PGY         Pagaya Technologies Ltd.    29.6800   26.136843
13   MSFT            Microsoft Corporation   512.2650    1.314232
14    LLY 

# Social Media Sentement Dependencies

In [5]:
#praw -> Python Reddit API Wrapper
reddit = praw.Reddit(
    client_id=os.environ["REDDIT_CLIENT_ID"],
    client_secret=os.environ["REDDIT_CLIENT_SECRET"],
    user_agent=os.environ["REDDIT_USER_AGENT"],
)

#tweepy -> twitter api
twitter_client = tweepy.Client(bearer_token=os.environ["TWITTER_BEARER_TOKEN"])

# Expanded keyword lists for sentiment detection

BULLISH_KEYWORDS = [
    # Actions / Verbs
    "buy", "buying", "bought", "accumulate", "loading up",
    # Slang & Slogans
    "long", "diamond hands", "💎🙌", "rocket", "🚀",
    "to the moon", "moon", "moonshot", "rip", "pump",
    # Adjectives & Calls
    "bull", "bullish", "green", "green days", "breakout", "squeeze",
    # FOMO & Fear-of-Missing-Out
    "fomo", "can’t miss", "cant miss", "don’t sleep on", "dont sleep on"
]

BEARISH_KEYWORDS = [
    # Actions / Verbs
    "sell", "selling", "sold", "dump", "dumping", "liquidate", "offload", "plummet",
    # Slang & Slogans
    "short", "shorted", "short squeeze", "paper hands",
    # Adjectives & Warnings
    "bear", "bearish", "red", "crash", "crashing", "plunge", "bleed", "weak",
    # Financial Distress
    "bankrupt", "margin call", "stop loss", "capitulation", "bagholder",
    # Emojis
    "📉", "🔻", "😱", "😢", "💀"
]
STOCK_SUBREDDITS = [
    "stocks",
    "investing",
    "StockMarket",
    "wallstreetbets",
    "pennystocks",
    "robinhood"
]
ALL_SUBS = "+".join(STOCK_SUBREDDITS)

In [6]:
# # def detect_sentiment(text):
# #     t = text.lower()
# #     bullish = any(k in t for k in BULLISH_KEYWORDS)
# #     bearish = any(k in t for k in BEARISH_KEYWORDS)
# #     return bullish, bearish

# def fetch_reddit_stock_sentiment(ticker, total_limit=500, max_comments=20):
#     """
#     Fetch up to 30 days back total_limit posts for $TICKER across STOCK_SUBREDDITS,
#     include up to max_comments per post, and flag bullish/bearish.
#     Returns agg_dict.
#     """
#     # 30 days in sec
#     cutoff = (time.time() - 30 * 24 * 3600)
#     query = f"${ticker} lang:en"
#     records = []
#     comment_count = 0
#     bullish_count = 0
#     bearish_count = 0

#     clean = "".join(ch for ch in ticker if ch.isalnum()).upper()
#     # match either with or without the $ prefix
#     query = f'"${clean}" OR "{clean}"'
#     for post in reddit.subreddit(ALL_SUBS).search(query,
#                                                  limit=total_limit,
#                                                  sort="new"):
#         # grab post text
#         if post.created_utc > cutoff:
#             break        # all remaining posts will be older
        
#         pieces = [post.title or "", post.selftext or ""]
        
#         # grab comments (up to max_comments)
#         post.comments.replace_more(limit=0)
#         flat_comments = post.comments.list()[:max_comments]
#         pieces += [c.body for c in flat_comments]
        
#         full_text = " ".join(pieces)
#         # is_bullish, is_bearish = detect_sentiment(full_text)

#         for c in flat_comments:
#             text = c.body.lower()
#             # print(text)
#             comment_count += 1
#             if any(k in text for k in BULLISH_KEYWORDS):
#                 bullish_count += 1
#             if any(k in text for k in BEARISH_KEYWORDS):
#                 bearish_count += 1
        
#             # print("bullish: " + str(bullish_count))
#             # print("bearish: " + str(bearish_count))
        
#         # print(full_text)
#         records.append({
#             "ticker":      ticker,
#             "subreddit":   post.subreddit.display_name,
#             "created_utc": datetime.datetime.utcfromtimestamp(post.created_utc),
#             "bullish":     bullish_count,
#             "bearish":     bearish_count,
#             # optional for debugging:
#             # "raw_text": full_text[:200]  
#         })
#     df = pd.DataFrame(records)
#     # display(df)
#     # build your aggregate metrics
#     agg = {
#         "ticker": ticker,
#         "reddit_mentions": len(df),
#         "reddit_bullish":  int(df["bullish"].sum()) if not df.empty else 0,
#         "reddit_bearish":  int(df["bearish"].sum()) if not df.empty else 0,
#     }
#     return agg


In [7]:
# def fetch_reddit_sentiment_for_all_equities(
#     df_equities,
#     total_limit=200,
#     max_comments=50
# ):
#     """
#     For each ticker in df_equities['ticker'], call
#     fetch_reddit_comment_sentiment() to get:
#       - reddit_comment_count
#       - reddit_comment_bullish
#       - reddit_comment_bearish

#     Then rename those last two to bullish_count/bearish_count
#     and merge them back into df_equities.
#     """
#     # 1) Gather all metrics into a list of dicts
#     records = []
#     for ticker in df_equities['ticker']:
#         try:
#             result = fetch_reddit_stock_sentiment(ticker)
#         except Exception:
#             print(Exception)
#             result = {
#                 "reddit_comment_count":   0,
#                 "reddit_com ment_bullish": 0,
#                 "reddit_comment_bearish": 0,
#             }
#         records.append(result)
    
#     # 2) Build a DataFrame of raw metrics
#     df_metrics = pd.DataFrame(records)
#     display(df_metrics)
    
#     # 3) Merge by direct assignment (avoids any KeyError on slicing)
#     df = df_equities.reset_index(drop=True).copy()
#     df["reddit_bullish_count"] = df_metrics["reddit_bullish"]
#     df["reddit_bearish_count"] = df_metrics["reddit_bearish"]

#     return df

# # Usage:

# # print(df_equities)
# df_with_reddit = fetch_reddit_sentiment_for_all_equities(df_equities)


# Reddit Sentiment Analysis

In [8]:
def zero_metrics_for_reddit(ticker):
    full_empty ={
        "ticker":                      ticker,
        "reddit_post_count":           0,
        "reddit_post_bullish":         0,
        "reddit_post_bearish":         0,
        "reddit_comment_count":        0,
        "reddit_comment_bullish":      0,
        "reddit_comment_bearish":      0,
        "reddit_weighted_comment_bull":0,
        "reddit_weighted_comment_bear":0,
        "reddit_avg_comment_sentiment":0.0,
        "reddit_bullish_ratio":        0.0,
        "reddit_bearish_ratio":        0.0,
    }
    partial_empty = {
        "ticker": ticker,
        "reddit_avg_comment_sentiment": 0,
        "reddit_bullish_ratio": 0,
        "reddit_bearish_ratio": 0
    }

    return full_empty, partial_empty
    
    

In [9]:
sia = SentimentIntensityAnalyzer()

def fetch_reddit_stock_sentiment(ticker: str,
                                 company_name: str,
                                 total_limit: int = 500,
                                 max_comments: int = 20):
    """
    Fetch sentiment signals for $TICKER over the past 30 days.
    Returns a metrics dict with:
      - reddit_post_count
      - reddit_post_bullish
      - reddit_post_bearish
      - reddit_comment_count
      - reddit_comment_bullish
      - reddit_comment_bearish
      - reddit_weighted_comment_bullish
      - reddit_weighted_comment_bearish
      - reddit_avg_comment_sentiment
    """
    # 1) 30-day cutoff
    now = time.time()
    cutoff = now - 30 * 24 * 3600

    # 2) Build a query matching either $TICKER or plain TICKER
    clean_tkr = "".join(ch for ch in ticker if ch.isalnum()).upper()
    clean_nm  = "".join(ch for ch in company_name if ch.isalnum() or ch.isspace()).upper()
    # match cashtag, bare ticker, **or** company name
    query = f'"${clean_tkr}" OR "{clean_tkr}" OR "{clean_nm}"'


    # 3) Initialize counters
    post_count = comment_count = 0
    post_bull = post_bear = 0
    comment_bull = comment_bear = 0
    weighted_comment_bull = weighted_comment_bear = 0
    total_compound = 0.0

    try:
        for post in reddit.subreddit(ALL_SUBS).search(
            query, limit=total_limit, sort="new"
        ):
            # stop once posts fall outside our 30-day window
            if post.created_utc < cutoff:
                break
            post_count += 1

            # 4) Post-level sentiment
            post_text = (post.title + " " + getattr(post, "selftext", "")).lower()
            if any(k in post_text for k in BULLISH_KEYWORDS):
                post_bull += 1
            if any(k in post_text for k in BEARISH_KEYWORDS):
                post_bear += 1

            # 5) Gather comments
            post.comments.replace_more(limit=0)
            comments = post.comments.list()[:max_comments]
            for c in comments:
                # skip old comments
                if c.created_utc < cutoff:
                    continue

                comment_count += 1
                txt = c.body
                low = txt.lower()
                is_bull = any(k in low for k in BULLISH_KEYWORDS)
                is_bear = any(k in low for k in BEARISH_KEYWORDS)

                # simple counts
                comment_bull += int(is_bull)
                comment_bear += int(is_bear)

                # weight by post upvotes (as a proxy for visibility)
                score = getattr(c, "score", 0)
                weighted_comment_bull += score if is_bull else 0
                weighted_comment_bear += score if is_bear else 0

                # VADER compound score
                total_compound += sia.polarity_scores(txt)["compound"]

    except prawcore.exceptions.BadRequest:
        # existing BadRequest handler…
        return zero_metrics_for_reddit(ticker)
    except pc_exc.ServerError as e:
        print(f"⚠️ Reddit server error for {ticker}: {e}")
        return zero_metrics_for_reddit(ticker)

    # 6) Final derived metrics
    avg_compound = total_compound / comment_count if comment_count else 0.0
    bullish_ratio = (post_bull+comment_bull)/(post_count+comment_count) if comment_count else 0.0
    bearish_ratio = (post_bear+comment_bear)/(post_count+comment_count) if comment_count else 0.0
    
    
    full_reddit_df = {
        "ticker": ticker,
        "reddit_post_count": post_count,
        "reddit_post_bullish": post_bull,
        "reddit_post_bearish": post_bear,
        "reddit_comment_count": comment_count,
        "reddit_comment_bullish": comment_bull,
        "reddit_comment_bearish": comment_bear,
        "reddit_weighted_comment_bullish": weighted_comment_bull,
        "reddit_weighted_comment_bearish": weighted_comment_bear,
        "reddit_avg_comment_sentiment": round(avg_compound, 4),
        "reddit_bullish_ratio": round(bullish_ratio, 4),
        "reddit_bearish_ratio": round(bearish_ratio, 4)
    }

    reddit_sentiment_df = {
        "ticker": ticker,
        "reddit_avg_comment_sentiment": round(avg_compound, 4),
        "reddit_bullish_ratio": round(bullish_ratio, 4),
        "reddit_bearish_ratio": round(bearish_ratio, 4)
    }

    return full_reddit_df, reddit_sentiment_df


In [10]:
def merge_reddit(df):
    df_copy = df.copy()
    records = []
    full_records = []
    for ticker, name in zip(df["ticker"], df["name"]):
        full_metrics, sentiment_metrics = fetch_reddit_stock_sentiment(
            ticker,
            company_name=name,
            total_limit=500,
            max_comments=20
        )
        records.append(sentiment_metrics)
        full_records.append(full_metrics)

    df_full = pd.DataFrame(full_records).set_index("ticker")
    df_full_reddit_metrics = df_copy.join(df_full, on="ticker", how="left")
    
    df_metrics = pd.DataFrame(records).set_index("ticker")
    df_sentiment_metrics = df.join(df_metrics, on="ticker", how="left")
    
    return df_full_reddit_metrics, df_sentiment_metrics


In [11]:
start_time = time.perf_counter()

df_full_reddit, df_sentiment_reddit = merge_reddit(df_equities)

end_time = time.perf_counter()
print(f"Results took: {end_time - start_time}")

Results took: 202.2235278999433


In [12]:
display(df_full_reddit)

Unnamed: 0,ticker,name,price,change_pct,reddit_post_count,reddit_post_bullish,reddit_post_bearish,reddit_comment_count,reddit_comment_bullish,reddit_comment_bearish,reddit_weighted_comment_bullish,reddit_weighted_comment_bearish,reddit_avg_comment_sentiment,reddit_bullish_ratio,reddit_bearish_ratio
0,LCID,"Lucid Group, Inc.",3.115,36.026203,4,3,2,55,19,26,443,607,0.1925,0.3729,0.4746
1,AMC,"AMC Entertainment Holdings, Inc",3.515,10.534593,3,2,3,29,5,7,8,12,0.3486,0.2188,0.3125
2,PEP,"Pepsico, Inc.",145.4956,7.495825,9,4,5,114,16,20,478,379,-0.0197,0.1626,0.2033
3,NIO,NIO Inc.,4.405,7.177617,6,5,3,100,11,19,69,284,0.0143,0.1509,0.2075
4,RIVN,"Rivian Automotive, Inc.",12.95,4.51977,0,0,0,0,0,0,0,0,0.0,0.0,0.0
5,BBAI,"BigBear.ai, Inc.",8.035,12.851124,6,5,4,97,15,30,299,389,0.2611,0.1942,0.3301
6,QS,QuantumScape Corporation,13.345,17.577091,10,7,4,140,25,33,566,183,0.1639,0.2133,0.2467
7,PLTR,Palantir Technologies Inc.,154.105,2.117151,49,31,21,816,162,172,3594,3666,0.1508,0.2231,0.2231
8,ABT,Abbott Laboratories,121.39,-7.856388,12,4,7,113,19,19,488,357,0.0098,0.184,0.208
9,NVDA,NVIDIA Corporation,173.355,1.158313,135,80,76,2109,426,445,24928,23864,0.1587,0.2255,0.2322


In [13]:
# display(df_equities)
display(df_sentiment_reddit)

Unnamed: 0,ticker,name,price,change_pct,reddit_avg_comment_sentiment,reddit_bullish_ratio,reddit_bearish_ratio
0,LCID,"Lucid Group, Inc.",3.115,36.026203,0.1925,0.3729,0.4746
1,AMC,"AMC Entertainment Holdings, Inc",3.515,10.534593,0.3486,0.2188,0.3125
2,PEP,"Pepsico, Inc.",145.4956,7.495825,-0.0197,0.1626,0.2033
3,NIO,NIO Inc.,4.405,7.177617,0.0143,0.1509,0.2075
4,RIVN,"Rivian Automotive, Inc.",12.95,4.51977,0.0,0.0,0.0
5,BBAI,"BigBear.ai, Inc.",8.035,12.851124,0.2611,0.1942,0.3301
6,QS,QuantumScape Corporation,13.345,17.577091,0.1639,0.2133,0.2467
7,PLTR,Palantir Technologies Inc.,154.105,2.117151,0.1508,0.2231,0.2231
8,ABT,Abbott Laboratories,121.39,-7.856388,0.0098,0.184,0.208
9,NVDA,NVIDIA Corporation,173.355,1.158313,0.1587,0.2255,0.2322


# Twitter Sentiment Analysis

In [14]:
# def fetch_twitter_stock_sentiment(ticker, company_name=None, max_results=100):
#     clean = "".join(ch for ch in ticker if ch.isalnum()).upper()
#     query_tkr = f'"${clean}" OR "{clean}"'
#     if company_name:
#         clean_nm = "".join(ch for ch in company_name if ch.isalnum() or ch.isspace())
#         query_tkr += f' OR "{clean_nm}"'
#     query = f"{query_tkr} lang:en -is:retweet"

#     tweets = twitter_client.search_recent_tweets(query,
#                                         tweet_fields=["text", "public_metrics"],
#                                         max_results=max_results).data or []

#     mentions = len(tweets)
#     tweets_count = bull = bear = 0
#     total_compound = 0.0
    

#     for t in tweets:
#         text = t.text
#         low = text.lower()
#         is_bull = any(k in low for k in BULLISH_KEYWORDS)
#         is_bear = any(k in low for k in BEARISH_KEYWORDS)
#         bull += int(is_bull)
#         bear += int(is_bear)
#         tweets_count += 1
#         total_compound += sia.polarity_scores(text)["compound"]
    
#     avg_compound = total_compound / mentions if mentions else 0.0
#     bearish_ratio = bear/tweets_count
#     bullish_ratio = bull/tweets_count
    
#     full_twitter_df = {
#         "ticker": ticker,
#         "twitter_mentions": mentions,
#         "twitter_bullish_count": bull,
#         "twitter_bearish_count": bear,
#         "twitter_avg_sentiment": round(avg_compound, 4),
#         "twitter_bullish_ratio": round(bullish_ratio, 4),
#         "twitter_bearish_ratio": round(bearish_ratio, 4)
#     }

#     twitter_sentiment_df = {
#         "ticker": ticker,
#         "twitter_avg_sentiment": round(avg_compound, 4),
#         "twitter_bullish_ratio": round(bullish_ratio, 4),
#         "twitter_bearish_ratio": round(bearish_ratio, 4)
#     }

#     return full_twitter_df, twitter_sentiment_df
    

In [15]:
def fetch_twitter_stock_sentiment(
    ticker: str,
    company_name: str = None,
    max_results: int = 100,
    retry_attempts: int = 2,
    backoff_seconds: float = 5.0
):
    """
    Uses snscrape to fetch tweets matching $TICKER, TICKER, or COMPANY_NAME.
    Retries up to `retry_attempts` on ScraperException, backing off each time.
    Returns (full_metrics_dict, sentiment_metrics_dict).
    """
    # Build the query
    clean = "".join(ch for ch in ticker if ch.isalnum()).upper()
    parts = [f'"${clean}"', f'"{clean}"']
    if company_name:
        clean_nm = "".join(ch for ch in company_name if ch.isalnum() or ch.isspace())
        parts.append(f'"{clean_nm}"')
    query = " OR ".join(parts) + " lang:en"

    def zero_dict():
        return (
            {
                "ticker":                ticker,
                "twitter_mentions":      0,
                "twitter_bullish_count": 0,
                "twitter_bearish_count": 0,
                "twitter_avg_sentiment": 0.0,
                "twitter_bullish_ratio": 0.0,
                "twitter_bearish_ratio": 0.0,
            },
            {
                "ticker":                ticker,
                "twitter_avg_sentiment": 0.0,
                "twitter_bullish_ratio": 0.0,
                "twitter_bearish_ratio": 0.0,
            }
        )

    attempts = 0
    while attempts <= retry_attempts:
        try:
            tweet_count = bull = bear = 0
            total_compound = 0.0

            for i, tweet in enumerate(sntwitter.TwitterSearchScraper(query).get_items()):
                if i >= max_results:
                    break
                text = tweet.content
                low = text.lower()
                is_bull = any(k in low for k in BULLISH_KEYWORDS)
                is_bear = any(k in low for k in BEARISH_KEYWORDS)

                tweet_count += 1
                bull += int(is_bull)
                bear += int(is_bear)
                total_compound += sia.polarity_scores(text)["compound"]

            if tweet_count == 0:
                return zero_dict()

            avg_compound = total_compound / tweet_count
            bullish_ratio = bull / tweet_count
            bearish_ratio = bear / tweet_count

            full = {
                "ticker":                ticker,
                "twitter_mentions":      tweet_count,
                "twitter_bullish_count": bull,
                "twitter_bearish_count": bear,
                "twitter_avg_sentiment": round(avg_compound, 4),
                "twitter_bullish_ratio": round(bullish_ratio, 4),
                "twitter_bearish_ratio": round(bearish_ratio, 4),
            }
            sentiment = {
                "ticker":                ticker,
                "twitter_avg_sentiment": round(avg_compound, 4),
                "twitter_bullish_ratio": round(bullish_ratio, 4),
                "twitter_bearish_ratio": round(bearish_ratio, 4),
            }
            return full, sentiment

        except sb_exc.ScraperException as e:
            attempts += 1
            if attempts > retry_attempts:
                print(f"❌ snscrape failed for {ticker} after {attempts} attempts: {e}")
                return zero_dict()
            else:
                print(f"⚠️ snscrape error for {ticker}, retry {attempts}/{retry_attempts} after {backoff_seconds}s...")
                time.sleep(backoff_seconds)


In [16]:
def merge_twitter(df):
    start_time = time.perf_counter()
    df_copy = df.copy()
    records = []
    full_records = []
    for ticker, name in zip(df["ticker"], df["name"]):
        full_metrics, sentiment_metrics = fetch_twitter_stock_sentiment(
            ticker,
            company_name=name,
            max_results=10
        )
        records.append(sentiment_metrics)
        full_records.append(full_metrics)

    df_full = pd.DataFrame(full_records).set_index("ticker")
    df_full_twitter_metrics = df_copy.join(df_full, on="ticker", how="left")
    
    df_metrics = pd.DataFrame(records).set_index("ticker")
    df_twitter_sentiment_metrics = df.join(df_metrics, on="ticker", how="left")

    end_time = time.perf_counter()
    print(f"Results took: {end_time - start_time}")
    return df_full_twitter_metrics, df_twitter_sentiment_metrics

In [17]:
# start_time = time.perf_counter()

# df_full_twitter, df_sm_sentiment = merge_twitter(df_equities)

# end_time = time.perf_counter()
# print(f"Results took: {end_time - start_time}")

In [18]:
display(df_sentiment_reddit)

Unnamed: 0,ticker,name,price,change_pct,reddit_avg_comment_sentiment,reddit_bullish_ratio,reddit_bearish_ratio
0,LCID,"Lucid Group, Inc.",3.115,36.026203,0.1925,0.3729,0.4746
1,AMC,"AMC Entertainment Holdings, Inc",3.515,10.534593,0.3486,0.2188,0.3125
2,PEP,"Pepsico, Inc.",145.4956,7.495825,-0.0197,0.1626,0.2033
3,NIO,NIO Inc.,4.405,7.177617,0.0143,0.1509,0.2075
4,RIVN,"Rivian Automotive, Inc.",12.95,4.51977,0.0,0.0,0.0
5,BBAI,"BigBear.ai, Inc.",8.035,12.851124,0.2611,0.1942,0.3301
6,QS,QuantumScape Corporation,13.345,17.577091,0.1639,0.2133,0.2467
7,PLTR,Palantir Technologies Inc.,154.105,2.117151,0.1508,0.2231,0.2231
8,ABT,Abbott Laboratories,121.39,-7.856388,0.0098,0.184,0.208
9,NVDA,NVIDIA Corporation,173.355,1.158313,0.1587,0.2255,0.2322


In [None]:
display(df_sm_sentiment)