## Installing Library

In [None]:


!pip install pandas nltk textblob



In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
import numpy as np
from textblob import TextBlob
import os
import matplotlib.pyplot as plt
import nltk # Required for TextBlob dependencies

# --- 1. Configuration: Define File Paths ---

PROCESSED_STOCK_FILE = 'processed_stock_analysis.csv'
NEWS_FILE = 'raw_news_ratings.csv'
# ----------------------------------------------------------------

# --- 2. Load and Align Data ---
print("--- 1. Data Alignment and Preparation ---")

try:
    # Load Stock Data (Assuming Ticker and Date are in the index)
    df_stock = pd.read_csv(PROCESSED_STOCK_FILE, index_col=['Ticker', 'Date'], parse_dates=['Date'])
    df_stock = df_stock.reset_index() # Flatten for easier merging later

    # Load News Data
    df_news = pd.read_csv(NEWS_FILE)

except FileNotFoundError as e:
    raise RuntimeError(f"CRITICAL ERROR: File not found. Check path/name: {e}")

# Identify News Date Column
if 'Date' in df_news.columns:
    date_col = 'Date'
elif 'PublishDate' in df_news.columns:
    date_col = 'PublishDate'
else:
    raise ValueError("ERROR: News file must contain a 'Date' or 'PublishDate' column.")

# Normalize Dates
df_news[date_col] = pd.to_datetime(df_news[date_col]).dt.normalize()
df_news.rename(columns={date_col: 'Date'}, inplace=True)

print("Stock and News data loaded and dates normalized.")

# --- 3. Sentiment Analysis (TextBlob) ---
print("\n--- 2. Sentiment Analysis (TextBlob) ---")

# Identify News Headline Column
if 'Headline' not in df_news.columns:
    raise ValueError("ERROR: News file must contain a 'Headline' column for sentiment analysis.")

def get_sentiment_polarity(text):
    """Calculates sentiment polarity (score) using TextBlob (Polarity: -1 to +1)."""
    try:
        return TextBlob(str(text)).sentiment.polarity
    except:
        return 0.0

df_news['Sentiment_Score'] = df_news['Headline'].apply(get_sentiment_polarity)
print("Sentiment scores calculated for news headlines.")

# --- 4. Calculate Daily Stock Returns ---
print("\n--- 3. Calculate Daily Stock Returns ---")

def compute_daily_returns(group):
    """Computes the percentage change in daily closing prices."""
    group['Daily_Return'] = group['Close'].pct_change()
    return group

df_stock = df_stock.groupby('Ticker').apply(compute_daily_returns)
print("Daily returns calculated for all stocks.")

# --- 5. Correlation Analysis ---
print("\n--- 4. Correlation Analysis ---")

# 5.1 Aggregate Sentiments (Average score per Ticker per Day)
if 'Ticker' not in df_news.columns:
    print("WARNING: News data lacks 'Ticker' column. Assuming news applies to all stocks.")
    sentiment_agg = df_news.groupby('Date')['Sentiment_Score'].mean().reset_index()
    sentiment_agg.rename(columns={'Sentiment_Score': 'Avg_Daily_Sentiment'}, inplace=True)
else:
    sentiment_agg = df_news.groupby(['Ticker', 'Date'])['Sentiment_Score'].mean().reset_index()
    sentiment_agg.rename(columns={'Sentiment_Score': 'Avg_Daily_Sentiment'}, inplace=True)

# 5.2 Merge DataFrames
df_merged = df_stock.merge(sentiment_agg, on=['Ticker', 'Date'], how='inner')

# Drop NaNs
df_merged = df_merged.dropna(subset=['Daily_Return', 'Avg_Daily_Sentiment'])

print(f"Data points available for correlation after merge: {len(df_merged)}")

# 5.3 Calculate Pearson Correlation per Ticker
correlation_results = df_merged.groupby('Ticker')[['Daily_Return', 'Avg_Daily_Sentiment']].corr().unstack().iloc[:, 1]

# 5.4 Display Results (using print for console output)
correlation_results = correlation_results.reset_index()
correlation_results.rename(columns={0: 'Correlation (Pearson r)'}, inplace=True)
correlation_results = correlation_results.set_index('Ticker')

print("\n--- Correlation Results: Sentiment vs. Daily Return ---")
# Use print for DataFrame output
print(correlation_results.to_markdown(floatfmt=".4f"))

# --- 6. Optional Visualization of Correlation Scatter Plot (using Matplotlib) ---
print("\n--- 5. Optional Visualization: Scatter Plot of Sentiment vs. Returns ---")


# Plotting the data for the first ticker for a visual check
first_ticker = df_merged['Ticker'].iloc[0] if not df_merged.empty else None

if first_ticker:
    df_plot = df_merged[df_merged['Ticker'] == first_ticker]
    r_val = correlation_results.loc[first_ticker, 'Correlation (Pearson r)']

    plt.figure(figsize=(8, 5))
    plt.scatter(df_plot['Avg_Daily_Sentiment'], df_plot['Daily_Return'] * 100, alpha=0.6)
    plt.title(f'Sentiment vs. Daily Return for {first_ticker} (r={r_val:.4f})')
    plt.xlabel('Average Daily Sentiment Score (Polarity: -1 to +1)')
    plt.ylabel('Daily Stock Return (%)')
    plt.grid(True)
    plt.show()

print("\n--- Execution Complete ---")