<a href="https://colab.research.google.com/github/Arjun-Nautiyal/Reddit-ML-Model/blob/main/CapX3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install praw
import praw
import pandas as pd
from datetime import datetime, timedelta
import yfinance as yf
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import numpy as np

# Initialize Reddit API
reddit = praw.Reddit(
    client_id='TE8_ZwtV8T6El1wiXZfdSQ',
    client_secret='1y4f97KOW_Gk9udrSm4g7LIBWovR2w',
    user_agent='stockScraper by /u/YourRedditUsername'
)

# Subreddits to scrape
subreddits = ["stocks", "wallstreetbets", "investing", "StockMarket", "finance"]
posts = []

# Scrape posts from each subreddit
for subreddit_name in subreddits:
    subreddit = reddit.subreddit(subreddit_name)
    print(f"Scraping data from r/{subreddit_name}...")
    for post in subreddit.top(limit=2000):
        if post.score > 5 and post.num_comments > 3:
            posts.append({
                'subreddit': subreddit_name,
                'title': post.title,
                'score': post.score,
                'num_comments': post.num_comments,
                'created': datetime.fromtimestamp(post.created),
                'content': post.selftext
            })

# Convert to DataFrame
df = pd.DataFrame(posts)
print(f"Data scraped: {df.shape[0]} rows")

# Expand tickers to include contextual matches and validate
def extract_ticker(text):
    import re
    tickers = re.findall(r'\$[A-Za-z]{1,5}', text)
    return [ticker.strip('$') for ticker in tickers] if tickers else None

def validate_ticker_with_context(text):
    tickers = extract_ticker(text)
    valid_tickers = ["AAPL", "TSLA", "MSFT", "AMZN", "GOOG", "GME", "AMC", "BB", "SLV", "SPCE", "HON", "DB", "KHC"]
    if not tickers:
        if "steel" in text.lower():
            return ["X"]
        if "honeywell" in text.lower():
            return ["HON"]
    return [t for t in tickers if t in valid_tickers] if tickers else None

df['tickers'] = df['content'].apply(validate_ticker_with_context)
print(f"Rows with valid tickers: {df.dropna(subset=['tickers']).shape[0]}")

# Load FinBERT model for sentiment analysis
tokenizer = AutoTokenizer.from_pretrained("yiyanghkust/finbert-tone")
finbert_model = AutoModelForSequenceClassification.from_pretrained("yiyanghkust/finbert-tone")

def compute_finbert_sentiment(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512, padding=True)
    outputs = finbert_model(**inputs)
    sentiment_scores = outputs.logits.softmax(dim=-1)
    sentiment = sentiment_scores.argmax().item()  # 0=Negative, 1=Neutral, 2=Positive
    return 1 if sentiment == 2 else 0

df['sentiment_movement'] = df['content'].fillna('').apply(compute_finbert_sentiment)

# Determine timeframe dynamically based on post content
def determine_timeframe(content):
    if "next week" in content.lower():
        return "7d"
    elif "this month" in content.lower():
        return "30d"
    elif "next year" in content.lower() or "this year" in content.lower():
        return "365d"
    else:
        return "7d"  # Default timeframe

# Fetch stock data with retry logic
def get_stock_movement(ticker, post_date, timeframe):
    post_date_str = post_date.strftime('%Y-%m-%d')
    end_date = post_date + timedelta(days=1 if timeframe == "1d" else 3 if timeframe == "3d" else 7 if timeframe == "7d" else 30 if timeframe == "30d" else 365)
    stock = yf.Ticker(ticker)
    try:
        history = stock.history(start=post_date_str, end=end_date.strftime('%Y-%m-%d'))
        if not history.empty:
            open_price = history['Open'].iloc[0]
            close_price = history['Close'].iloc[-1]
            return 1 if close_price > open_price else 0
    except Exception as e:
        print(f"Error fetching data for {ticker}: {e}")
    return None

def fetch_dynamic_movements(row):
    timeframe = determine_timeframe(row['content'])
    if row['tickers'] and len(row['tickers']) > 0:
        return timeframe, get_stock_movement(row['tickers'][0], row['created'], timeframe)
    return timeframe, None

# Apply fetching logic with dynamic timeframes
df['timeframe'], df['actual_movement_dynamic'] = zip(*df.apply(fetch_dynamic_movements, axis=1))

# Validate sentiment predictions dynamically
df['validation_dynamic'] = df['sentiment_movement'] == df['actual_movement_dynamic']

# Compute and print results
output_lines = []
for idx, row in df.iterrows():
    ticker = row['tickers'][0] if row['tickers'] else "N/A"
    post = row['content']
    timeframe = row['timeframe']
    predicted = "Up" if row['sentiment_movement'] == 1 else "Down"
    actual = "Up" if row['actual_movement_dynamic'] == 1 else "Down"
    accuracy = "Correct" if row['validation_dynamic'] else "Incorrect"

    line = (f"Post: {post}\n"
            f"Ticker: {ticker}\n"
            f"Timeframe: {timeframe}\n"
            f"Predicted Movement: {predicted}\n"
            f"Actual Movement: {actual}\n"
            f"Accuracy: {accuracy}\n"
            "-" * 50)
    output_lines.append(line)
    print(line)

# Save individual results to file
with open("reddit_stock_analysis_results.txt", "w") as f:
    f.write("\n".join(output_lines))

# Overall dynamic accuracy
if len(df['validation_dynamic']) > 0:
    dynamic_accuracy = df['validation_dynamic'].mean()
    print(f"Dynamic Accuracy: {dynamic_accuracy * 100:.2f}%")
else:
    print("Dynamic Accuracy: No valid data to compute.")

# Save results to CSV
df.to_csv("reddit_stock_analysis_dynamic.csv", index=False)
print("Results saved to reddit_stock_analysis_dynamic.csv")


Collecting praw
  Downloading praw-7.8.1-py3-none-any.whl.metadata (9.4 kB)
Collecting prawcore<3,>=2.4 (from praw)
  Downloading prawcore-2.4.0-py3-none-any.whl.metadata (5.0 kB)
Collecting update_checker>=0.18 (from praw)
  Downloading update_checker-0.18.0-py3-none-any.whl.metadata (2.3 kB)
Downloading praw-7.8.1-py3-none-any.whl (189 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m189.3/189.3 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading prawcore-2.4.0-py3-none-any.whl (17 kB)
Downloading update_checker-0.18.0-py3-none-any.whl (7.0 kB)
Installing collected packages: update_checker, prawcore, praw
Successfully installed praw-7.8.1 prawcore-2.4.0 update_checker-0.18.0


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



Scraping data from r/stocks...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/l

Scraping data from r/wallstreetbets...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/l

Scraping data from r/investing...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/l

Scraping data from r/StockMarket...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/l

Scraping data from r/finance...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/l

Data scraped: 4786 rows
Rows with valid tickers: 95


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/533 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/226k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/439M [00:00<?, ?B/s]

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Accuracy: Incorrect
-Post: 
Ticker: N/A
Timeframe: 7d
Predicted Movement: Down
Actual Movement: Down
Accuracy: Incorrect
-Post: 
Ticker: N/A
Timeframe: 7d
Predicted Movement: Down
Actual Movement: Down
Accuracy: Incorrect
-Post: 
Ticker: N/A
Timeframe: 7d
Predicted Movement: Down
Actual Movement: Down
Accuracy: Incorrect
-Post: 
Ticker: N/A
Timeframe: 7d
Predicted Movement: Down
Actual Movement: Down
Accuracy: Incorrect
-Post: 
Ticker: N/A
Timeframe: 7d
Predicted Movement: Down
Actual Movement: Down
Accuracy: Incorrect
-Post: 
Ticker: N/A
Timeframe: 7d
Predicted Movement: Down
Actual Movement: Down
Accuracy: Incorrect
-Post: 
Ticker: N/A
Timeframe: 7d
Predicted Movement: Down
Actual Movement: Down
Accuracy: Incorrect
-Post: 
Ticker: N/A
Timeframe: 7d
Predicted Movement: Down
Actual Movement: Down
Accuracy: Incorrect
-Post: 
Ticker: N/A
Timeframe: 7d
Predicted Movement: Down
Actual Movement: Down
Accuracy: Incorrect
-Post:

In [None]:
##Trying to get ticker name and all the metrics of erformance

In [None]:
!pip install praw
import praw
import pandas as pd
from datetime import datetime, timedelta
import yfinance as yf
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import numpy as np

# Load tickers and company names from the provided file
constituents_csv = "/content/constituents.csv"
company_df = pd.read_csv(constituents_csv)
company_dict = {row['Security'].lower(): row['Symbol'] for _, row in company_df.iterrows()}

# Initialize Reddit API
reddit = praw.Reddit(
    client_id='TE8_ZwtV8T6El1wiXZfdSQ',
    client_secret='1y4f97KOW_Gk9udrSm4g7LIBWovR2w',
    user_agent='stockScraper by /u/YourRedditUsername'
)

# Subreddits to scrape
subreddits = ["stocks", "wallstreetbets", "investing", "StockMarket", "finance"]
posts = []

# Scrape posts from each subreddit
for subreddit_name in subreddits:
    subreddit = reddit.subreddit(subreddit_name)
    print(f"Scraping data from r/{subreddit_name}...")
    for post in subreddit.top(limit=2000):
        if post.score > 5 and post.num_comments > 3:
            posts.append({
                'subreddit': subreddit_name,
                'title': post.title,
                'score': post.score,
                'num_comments': post.num_comments,
                'created': datetime.fromtimestamp(post.created),
                'content': post.selftext
            })

# Convert to DataFrame
df = pd.DataFrame(posts)
print(f"Data scraped: {df.shape[0]} rows")

# Ticker extraction and validation

def extract_ticker_with_context(text):
    import re
    tickers = re.findall(r'\b[A-Z]{1,5}\b', text)  # Extract potential tickers
    companies = [word.lower() for word in text.split() if word.lower() in company_dict]
    matched_tickers = list({company_dict[company] for company in companies})
    return list(set(tickers).intersection(company_dict.values())) + matched_tickers

df['tickers'] = df['content'].apply(lambda x: extract_ticker_with_context(str(x)))

# Validate and count rows with tickers
print(f"Rows with valid tickers: {df.dropna(subset=['tickers']).shape[0]}")

# Load FinBERT model for sentiment analysis
tokenizer = AutoTokenizer.from_pretrained("yiyanghkust/finbert-tone")
finbert_model = AutoModelForSequenceClassification.from_pretrained("yiyanghkust/finbert-tone")

def compute_finbert_sentiment(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512, padding=True)
    outputs = finbert_model(**inputs)
    sentiment_scores = outputs.logits.softmax(dim=-1)
    sentiment = sentiment_scores.argmax().item()  # 0=Negative, 1=Neutral, 2=Positive
    return 1 if sentiment == 2 else 0

df['sentiment_movement'] = df['content'].fillna('').apply(compute_finbert_sentiment)

# Determine timeframe dynamically based on post content
def determine_timeframe(content):
    if "next week" in content.lower():
        return "7d"
    elif "this month" in content.lower():
        return "30d"
    elif "next year" in content.lower() or "this year" in content.lower():
        return "365d"
    else:
        return "7d"  # Default timeframe

# Fetch stock data with retry logic
def get_stock_movement(ticker, post_date, timeframe):
    post_date_str = post_date.strftime('%Y-%m-%d')
    end_date = post_date + timedelta(days=1 if timeframe == "1d" else 3 if timeframe == "3d" else 7 if timeframe == "7d" else 30 if timeframe == "30d" else 365)
    stock = yf.Ticker(ticker)
    try:
        history = stock.history(start=post_date_str, end=end_date.strftime('%Y-%m-%d'))
        if not history.empty:
            open_price = history['Open'].iloc[0]
            close_price = history['Close'].iloc[-1]
            return 1 if close_price > open_price else 0
    except Exception as e:
        print(f"Error fetching data for {ticker}: {e}")
    return None

def fetch_dynamic_movements(row):
    timeframe = determine_timeframe(row['content'])
    if row['tickers'] and len(row['tickers']) > 0:
        return timeframe, get_stock_movement(row['tickers'][0], row['created'], timeframe)
    return timeframe, None

# Apply fetching logic with dynamic timeframes
df['timeframe'], df['actual_movement_dynamic'] = zip(*df.apply(fetch_dynamic_movements, axis=1))

# Validate sentiment predictions dynamically
df['validation_dynamic'] = df['sentiment_movement'] == df['actual_movement_dynamic']

# Compute performance metrics
true_values = df['actual_movement_dynamic'].dropna()
predicted_values = df['sentiment_movement'][df['actual_movement_dynamic'].notna()]

accuracy = accuracy_score(true_values, predicted_values)
precision = precision_score(true_values, predicted_values)
recall = recall_score(true_values, predicted_values)
f1 = f1_score(true_values, predicted_values)

print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")

# Save results to CSV
df.to_csv("reddit_stock_analysis_dynamic.csv", index=False)
print("Results saved to reddit_stock_analysis_dynamic.csv")




It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



Scraping data from r/stocks...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/l

Scraping data from r/wallstreetbets...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/l

Scraping data from r/investing...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/l

Scraping data from r/StockMarket...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/l

Scraping data from r/finance...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/l

Data scraped: 4786 rows
Rows with valid tickers: 4786


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/533 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/226k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/439M [00:00<?, ?B/s]

ERROR:yfinance:$ABNB: possibly delisted; no price data found  (1d 2020-11-12 -> 2020-11-19) (Yahoo error = "Data doesn't exist for startDate = 1605157200, endDate = 1605762000")
ERROR:yfinance:$DOW: possibly delisted; no price data found  (1d 2018-10-12 -> 2018-10-19) (Yahoo error = "Data doesn't exist for startDate = 1539316800, endDate = 1539921600")
ERROR:yfinance:$UBER: possibly delisted; no price data found  (1d 2019-03-30 -> 2019-04-06) (Yahoo error = "Data doesn't exist for startDate = 1553918400, endDate = 1554523200")
ERROR:yfinance:$UBER: possibly delisted; no price data found  (1d 2019-04-29 -> 2019-05-06) (Yahoo error = "Data doesn't exist for startDate = 1556510400, endDate = 1557115200")
ERROR:yfinance:$UBER: possibly delisted; no price data found  (1d 2017-11-10 -> 2017-11-17) (Yahoo error = "Data doesn't exist for startDate = 1510290000, endDate = 1510894800")
ERROR:yfinance:$ABNB: possibly delisted; no price data found  (1d 2020-04-23 -> 2020-04-30) (Yahoo error = "Dat

Accuracy: 0.42
Precision: 0.49
Recall: 0.09
F1 Score: 0.16
Results saved to reddit_stock_analysis_dynamic.csv
