In [1]:
pip install yfinance

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.2 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
pip install praw nltk pandas


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.2 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
import nltk
nltk.download('vader_lexicon')


[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\Sebert\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [None]:
import yfinance as yf
import pandas as pd
import os
from datetime import datetime, timedelta
import praw
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer

# Ensure the VADER lexicon is downloaded
nltk.download('vader_lexicon')

# --- Reddit API Credentials ---
REDDIT_CLIENT_ID = "VQ-NOvyPWyJvGZs1ifD0Ww"
REDDIT_CLIENT_SECRET = "BX_Dlp6miv2eMo4qt5JY_imgYVyMBA"
REDDIT_USER_AGENT = "StockSentimentAnalysis/0.1 by Joseph"

# Initialize the Reddit client (PRAW)
reddit = praw.Reddit(
    client_id=REDDIT_CLIENT_ID,
    client_secret=REDDIT_CLIENT_SECRET,
    user_agent=REDDIT_USER_AGENT
)

def fetch_intraday_chunks(ticker, start_date, end_date, interval="1h", max_days=60):
    """
    Fetch intraday data in chunks for a given ticker from start_date to end_date,
    avoiding yfinance's ~60-day intraday limit by splitting the date range.
    
    Returns a single DataFrame for the entire period, with a single-level DatetimeIndex.
    """
    start_dt = datetime.strptime(start_date, "%Y-%m-%d")
    end_dt = datetime.strptime(end_date, "%Y-%m-%d")
    
    all_data = []
    current_start = start_dt

    while current_start < end_dt:
        current_end = current_start + timedelta(days=max_days)
        if current_end > end_dt:
            current_end = end_dt

        chunk_start_str = current_start.strftime("%Y-%m-%d")
        chunk_end_str = current_end.strftime("%Y-%m-%d")
        
        print(f"Fetching {ticker} data from {chunk_start_str} to {chunk_end_str} with {interval} interval...")
        chunk_data = yf.download(ticker, start=chunk_start_str, end=chunk_end_str, interval=interval)
        
        if not chunk_data.empty:
            all_data.append(chunk_data)
        else:
            print(f"No data returned for {ticker} from {chunk_start_str} to {chunk_end_str}.")
        
        current_start = current_end + timedelta(days=1)
    
    if all_data:
        full_data = pd.concat(all_data)
        full_data.sort_index(inplace=True)
        
        # Ensure index is a proper DatetimeIndex
        if not pd.api.types.is_datetime64_any_dtype(full_data.index):
            print("[DEBUG] Converting index to datetime...")
            full_data.index = pd.to_datetime(full_data.index, errors='coerce')
        
        # Flatten multi-level columns if needed
        if isinstance(full_data.columns, pd.MultiIndex):
            full_data.columns = [
                "_".join(col) if isinstance(col, tuple) else col
                for col in full_data.columns
            ]
        
        # Drop top level if we have a multi-level index (e.g., (ticker, datetime))
        if full_data.index.nlevels > 1:
            print("[DEBUG] Dropping the top index level...")
            full_data.index = full_data.index.droplevel(0)
        
        return full_data
    else:
        return pd.DataFrame()
    
def add_time_features(data):
    """
    Add time-based features to the DataFrame:
      - 'Hour': extracts the hour from the index.
      - 'HourlyCandleCount': counts the candle number within each day.
    """
    if not data.empty:
        # Extract hour from the DatetimeIndex
        data['Hour'] = data.index.hour
        # Count the hourly candle for each day (starting at 1)
        data['HourlyCandleCount'] = data.groupby(data.index.date).cumcount() + 1
    return data
    
def add_indicators(data, sma_windows=[20, 44, 50, 100, 200]):
    """
    Calculate technical indicators and add them as new columns to the DataFrame.
    
    Calculates:
      - SMA (Simple Moving Average) for each window in sma_windows.
      - MACD components (MACD line, Signal line, and Histogram).
      
    Parameters:
        data (DataFrame): DataFrame containing stock data.
        sma_windows (list): List of window periods for SMA calculation.
    
    Returns:
        DataFrame: DataFrame with additional indicator columns.
    """
    data.rename(columns=lambda x: "Close" if "Close" in x else x, inplace=True)
    if not data.empty and "Close" in data.columns:
        # Calculate SMAs for each specified window
        for window in sma_windows:
            data[f'SMA{window}'] = data['Close'].rolling(window=window).mean()
        
        # Calculate MACD components:
        # 12-period EMA and 26-period EMA
        data['EMA12'] = data['Close'].ewm(span=12, adjust=False).mean()
        data['EMA26'] = data['Close'].ewm(span=26, adjust=False).mean()
        # MACD Line
        data['MACD'] = data['EMA12'] - data['EMA26']
        # Signal Line: 9-period EMA of MACD
        data['MACD_Signal'] = data['MACD'].ewm(span=9, adjust=False).mean()
        # Histogram: MACD - Signal Line
        data['MACD_Histogram'] = data['MACD'] - data['MACD_Signal']
        
        # Debug: print the columns to verify technical indicators are added
        print("[DEBUG] Technical indicator columns added:", data.columns.tolist())
    return data

def get_stocks_data(ticker_list, start_date, end_date, interval="1h", max_days=60):
    """
    Fetch historical intraday stock data for each ticker over a large date range by chunking.
    Returns a dictionary {ticker: DataFrame}.
    """
    stocks_data = {}
    for ticker in ticker_list:
        data = fetch_intraday_chunks(ticker, start_date, end_date, interval, max_days)
        # Add SMA and MACD technical indicators
        data = add_indicators(data, sma_windows=[20, 44, 50, 100, 200])
        # Add time features
        data = add_time_features(data)
        stocks_data[ticker] = data
    return stocks_data

def fetch_reddit_posts(ticker, limit=200, subreddit="wallstreetbets"):
    """
    Fetch up to 'limit' Reddit posts from a subreddit that mention the ticker.
    (PRAW doesn't allow date-based filtering, so we do a broad search and later
    aggregate by date in Python.)
    
    Returns: list of dict, each with {'created': datetime, 'text': ...}
    """
    print(f"Fetching up to {limit} Reddit posts for {ticker} in r/{subreddit}...")
    posts = []
    try:
        for submission in reddit.subreddit(subreddit).search(ticker, limit=limit):
            created_dt = pd.to_datetime(submission.created_utc, unit='s', utc=True)
            created_dt = created_dt.tz_localize(None)
            text = f"{submission.title} {submission.selftext}"
            posts.append({'created': created_dt, 'text': text})
    except Exception as e:
        print(f"Error fetching Reddit posts: {e}")
    print(f"Fetched {len(posts)} posts for {ticker}.")
    return posts

def analyze_sentiment(posts):
    """
    Perform VADER sentiment analysis on each post.
    Returns a DataFrame with columns ['created', 'compound'].
    """
    if not posts:
        return pd.DataFrame(columns=["created", "compound"])
    
    sid = SentimentIntensityAnalyzer()
    results = []
    for p in posts:
        scores = sid.polarity_scores(p['text'])
        results.append({
            'created': p['created'],
            'compound': scores['compound']
        })
    return pd.DataFrame(results)

def aggregate_sentiment_by_date(sentiment_df):
    """
    Convert each post's 'created' datetime to a date, then average the 'compound' score by date.
    Returns a DataFrame indexed by 'Date' with column 'avg_sentiment'.
    """
    if sentiment_df.empty:
        return pd.DataFrame(columns=["avg_sentiment"])
    
    sentiment_df['Date'] = sentiment_df['created'].dt.date
    grouped = sentiment_df.groupby('Date')['compound'].mean().reset_index()
    grouped.rename(columns={'compound': 'avg_sentiment'}, inplace=True)
    grouped.set_index('Date', inplace=True)
    return grouped

def merge_stock_with_sentiment(stock_df, ticker):
    """
    1) Store original intraday index in 'Datetime' column
    2) Create a 'Date' column from that intraday index
    3) Reset the index to a RangeIndex
    4) Fetch & analyze Reddit posts for 'ticker'
    5) Aggregate sentiment by date
    6) Merge on 'Date'
    7) Restore the original intraday index as row labels
    8) Return a DataFrame with 'avg_sentiment'
    """
    if stock_df.empty:
        return stock_df
    
    # 1) Store original intraday index in a new column
    stock_df['Datetime'] = stock_df.index
    
    # 2) Create a 'Date' column from that intraday index
    stock_df['Date'] = stock_df.index.date
    
    # 3) Reset the index
    stock_df.reset_index(drop=True, inplace=True)
    
    print(f"[DEBUG] {ticker} stock data shape: {stock_df.shape}, columns: {stock_df.columns.tolist()}")

    # 4) Fetch & analyze Reddit posts
    posts = fetch_reddit_posts(ticker, limit=200)
    sentiment_df = analyze_sentiment(posts)
    sentiment_daily = aggregate_sentiment_by_date(sentiment_df)
    
    print(f"[DEBUG] sentiment_daily shape: {sentiment_daily.shape}, columns: {sentiment_daily.columns.tolist()}")
    
    # 5) Merge on 'Date'
    merged_df = stock_df.merge(sentiment_daily, how='left', left_on='Date', right_on='Date')
    merged_df['avg_sentiment'].fillna(0, inplace=True)
    
    # 7) Restore the original intraday timestamps as row labels
    #    We'll set the index to 'Datetime'
    merged_df.set_index('Datetime', inplace=True)
    
    return merged_df

def save_stocks_to_csv(stocks_data, output_dir="."):
    """
    For each ticker:
      1) Merge the chunked stock data with Reddit sentiment
      2) Print the final merged DataFrame
      3) Save to CSV
    """
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    for ticker, data in stocks_data.items():
        if data.empty:
            print(f"No data for {ticker}; skipping sentiment merge and CSV save.")
            continue
        
        merged_df = merge_stock_with_sentiment(data, ticker)
        
        print(f"\n--- Final merged DataFrame for {ticker} ---")
        print(merged_df.info())
        print(merged_df.head(5))
        print(merged_df.tail(5))
        print(f"Data shape: {merged_df.shape}\n")
        
        file_path = os.path.join(output_dir, f"{ticker}_data_with_sentiment.csv")
        merged_df.to_csv(file_path)
        print(f"Saved {ticker} data with sentiment to {file_path}")

if __name__ == "__main__":
    tickers = ["AAPL", "GOOG", "MSFT", "AMZN", "NVDA"]
    start_date = "2023-01-01"
    end_date = "2025-02-10"
    
    stocks_data = get_stocks_data(tickers, start_date, end_date, interval="1h", max_days=60)
    save_stocks_to_csv(stocks_data, output_dir="stock_data")


[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\Sebert\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Fetching AAPL data from 2023-09-01 to 2023-10-31 with 1h interval...
Fetching AAPL data from 2023-11-01 to 2023-12-31 with 1h interval...
Fetching AAPL data from 2024-01-01 to 2024-03-01 with 1h interval...
Fetching AAPL data from 2024-03-02 to 2024-05-01 with 1h interval...
Fetching AAPL data from 2024-05-02 to 2024-07-01 with 1h interval...
Fetching AAPL data from 2024-07-02 to 2024-08-31 with 1h interval...


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Fetching AAPL data from 2024-09-01 to 2024-10-31 with 1h interval...
Fetching AAPL data from 2024-11-01 to 2024-12-31 with 1h interval...
Fetching AAPL data from 2025-01-01 to 2025-02-10 with 1h interval...
[DEBUG] Technical indicator columns added: ['Close', 'High_AAPL', 'Low_AAPL', 'Open_AAPL', 'Volume_AAPL', 'SMA20', 'SMA44', 'SMA50', 'SMA100', 'SMA200', 'EMA12', 'EMA26', 'MACD', 'MACD_Signal', 'MACD_Histogram']
Fetching GOOG data from 2023-09-01 to 2023-10-31 with 1h interval...
Fetching GOOG data from 2023-11-01 to 2023-12-31 with 1h interval...



[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Fetching GOOG data from 2024-01-01 to 2024-03-01 with 1h interval...
Fetching GOOG data from 2024-03-02 to 2024-05-01 with 1h interval...
Fetching GOOG data from 2024-05-02 to 2024-07-01 with 1h interval...
Fetching GOOG data from 2024-07-02 to 2024-08-31 with 1h interval...
Fetching GOOG data from 2024-09-01 to 2024-10-31 with 1h interval...



[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Fetching GOOG data from 2024-11-01 to 2024-12-31 with 1h interval...
Fetching GOOG data from 2025-01-01 to 2025-02-10 with 1h interval...
[DEBUG] Technical indicator columns added: ['Close', 'High_GOOG', 'Low_GOOG', 'Open_GOOG', 'Volume_GOOG', 'SMA20', 'SMA44', 'SMA50', 'SMA100', 'SMA200', 'EMA12', 'EMA26', 'MACD', 'MACD_Signal', 'MACD_Histogram']
Fetching MSFT data from 2023-09-01 to 2023-10-31 with 1h interval...
Fetching MSFT data from 2023-11-01 to 2023-12-31 with 1h interval...
Fetching MSFT data from 2024-01-01 to 2024-03-01 with 1h interval...


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Fetching MSFT data from 2024-03-02 to 2024-05-01 with 1h interval...
Fetching MSFT data from 2024-05-02 to 2024-07-01 with 1h interval...
Fetching MSFT data from 2024-07-02 to 2024-08-31 with 1h interval...
Fetching MSFT data from 2024-09-01 to 2024-10-31 with 1h interval...
Fetching MSFT data from 2024-11-01 to 2024-12-31 with 1h interval...



[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Fetching MSFT data from 2025-01-01 to 2025-02-10 with 1h interval...
[DEBUG] Technical indicator columns added: ['Close', 'High_MSFT', 'Low_MSFT', 'Open_MSFT', 'Volume_MSFT', 'SMA20', 'SMA44', 'SMA50', 'SMA100', 'SMA200', 'EMA12', 'EMA26', 'MACD', 'MACD_Signal', 'MACD_Histogram']
Fetching AMZN data from 2023-09-01 to 2023-10-31 with 1h interval...
Fetching AMZN data from 2023-11-01 to 2023-12-31 with 1h interval...
Fetching AMZN data from 2024-01-01 to 2024-03-01 with 1h interval...



[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Fetching AMZN data from 2024-03-02 to 2024-05-01 with 1h interval...
Fetching AMZN data from 2024-05-02 to 2024-07-01 with 1h interval...
Fetching AMZN data from 2024-07-02 to 2024-08-31 with 1h interval...
Fetching AMZN data from 2024-09-01 to 2024-10-31 with 1h interval...
Fetching AMZN data from 2024-11-01 to 2024-12-31 with 1h interval...



[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Fetching AMZN data from 2025-01-01 to 2025-02-10 with 1h interval...
[DEBUG] Technical indicator columns added: ['Close', 'High_AMZN', 'Low_AMZN', 'Open_AMZN', 'Volume_AMZN', 'SMA20', 'SMA44', 'SMA50', 'SMA100', 'SMA200', 'EMA12', 'EMA26', 'MACD', 'MACD_Signal', 'MACD_Histogram']
Fetching NVDA data from 2023-09-01 to 2023-10-31 with 1h interval...
Fetching NVDA data from 2023-11-01 to 2023-12-31 with 1h interval...
Fetching NVDA data from 2024-01-01 to 2024-03-01 with 1h interval...
Fetching NVDA data from 2024-03-02 to 2024-05-01 with 1h interval...



[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Fetching NVDA data from 2024-05-02 to 2024-07-01 with 1h interval...
Fetching NVDA data from 2024-07-02 to 2024-08-31 with 1h interval...
Fetching NVDA data from 2024-09-01 to 2024-10-31 with 1h interval...
Fetching NVDA data from 2024-11-01 to 2024-12-31 with 1h interval...
Fetching NVDA data from 2025-01-01 to 2025-02-10 with 1h interval...





[DEBUG] Technical indicator columns added: ['Close', 'High_NVDA', 'Low_NVDA', 'Open_NVDA', 'Volume_NVDA', 'SMA20', 'SMA44', 'SMA50', 'SMA100', 'SMA200', 'EMA12', 'EMA26', 'MACD', 'MACD_Signal', 'MACD_Histogram']
[DEBUG] AAPL stock data shape: (2462, 17), columns: ['Close', 'High_AAPL', 'Low_AAPL', 'Open_AAPL', 'Volume_AAPL', 'SMA20', 'SMA44', 'SMA50', 'SMA100', 'SMA200', 'EMA12', 'EMA26', 'MACD', 'MACD_Signal', 'MACD_Histogram', 'Datetime', 'Date']
Fetching up to 200 Reddit posts for AAPL in r/wallstreetbets...
Fetched 200 posts for AAPL.
[DEBUG] sentiment_daily shape: (177, 1), columns: ['avg_sentiment']

--- Final merged DataFrame for AAPL ---
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2263 entries, 2023-10-12 16:30:00 to 2025-02-07 20:30:00
Data columns (total 16 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Close           2263 non-null   float64
 1   High_AAPL       2263 non-null   float64
 2   Low_AAPL        22

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  merged_df['avg_sentiment'].fillna(0, inplace=True)


Fetched 200 posts for GOOG.
[DEBUG] sentiment_daily shape: (163, 1), columns: ['avg_sentiment']

--- Final merged DataFrame for GOOG ---
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2263 entries, 2023-10-12 16:30:00 to 2025-02-07 20:30:00
Data columns (total 16 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Close           2263 non-null   float64
 1   High_GOOG       2263 non-null   float64
 2   Low_GOOG        2263 non-null   float64
 3   Open_GOOG       2263 non-null   float64
 4   Volume_GOOG     2263 non-null   int64  
 5   SMA20           2263 non-null   float64
 6   SMA44           2263 non-null   float64
 7   SMA50           2263 non-null   float64
 8   SMA100          2263 non-null   float64
 9   SMA200          2263 non-null   float64
 10  EMA12           2263 non-null   float64
 11  EMA26           2263 non-null   float64
 12  MACD            2263 non-null   float64
 13  MACD_Signal     2263 non-null   float64


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  merged_df['avg_sentiment'].fillna(0, inplace=True)


Fetched 200 posts for MSFT.
[DEBUG] sentiment_daily shape: (169, 1), columns: ['avg_sentiment']

--- Final merged DataFrame for MSFT ---
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2263 entries, 2023-10-12 16:30:00 to 2025-02-07 20:30:00
Data columns (total 16 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Close           2263 non-null   float64
 1   High_MSFT       2263 non-null   float64
 2   Low_MSFT        2263 non-null   float64
 3   Open_MSFT       2263 non-null   float64
 4   Volume_MSFT     2263 non-null   int64  
 5   SMA20           2263 non-null   float64
 6   SMA44           2263 non-null   float64
 7   SMA50           2263 non-null   float64
 8   SMA100          2263 non-null   float64
 9   SMA200          2263 non-null   float64
 10  EMA12           2263 non-null   float64
 11  EMA26           2263 non-null   float64
 12  MACD            2263 non-null   float64
 13  MACD_Signal     2263 non-null   float64


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  merged_df['avg_sentiment'].fillna(0, inplace=True)


Fetched 200 posts for AMZN.
[DEBUG] sentiment_daily shape: (174, 1), columns: ['avg_sentiment']

--- Final merged DataFrame for AMZN ---
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2263 entries, 2023-10-12 16:30:00 to 2025-02-07 20:30:00
Data columns (total 16 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Close           2263 non-null   float64
 1   High_AMZN       2263 non-null   float64
 2   Low_AMZN        2263 non-null   float64
 3   Open_AMZN       2263 non-null   float64
 4   Volume_AMZN     2263 non-null   int64  
 5   SMA20           2263 non-null   float64
 6   SMA44           2263 non-null   float64
 7   SMA50           2263 non-null   float64
 8   SMA100          2263 non-null   float64
 9   SMA200          2263 non-null   float64
 10  EMA12           2263 non-null   float64
 11  EMA26           2263 non-null   float64
 12  MACD            2263 non-null   float64
 13  MACD_Signal     2263 non-null   float64


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  merged_df['avg_sentiment'].fillna(0, inplace=True)


Fetched 200 posts for NVDA.
[DEBUG] sentiment_daily shape: (145, 1), columns: ['avg_sentiment']

--- Final merged DataFrame for NVDA ---
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2263 entries, 2023-10-12 16:30:00 to 2025-02-07 20:30:00
Data columns (total 16 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Close           2263 non-null   float64
 1   High_NVDA       2263 non-null   float64
 2   Low_NVDA        2263 non-null   float64
 3   Open_NVDA       2263 non-null   float64
 4   Volume_NVDA     2263 non-null   int64  
 5   SMA20           2263 non-null   float64
 6   SMA44           2263 non-null   float64
 7   SMA50           2263 non-null   float64
 8   SMA100          2263 non-null   float64
 9   SMA200          2263 non-null   float64
 10  EMA12           2263 non-null   float64
 11  EMA26           2263 non-null   float64
 12  MACD            2263 non-null   float64
 13  MACD_Signal     2263 non-null   float64


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  merged_df['avg_sentiment'].fillna(0, inplace=True)
