In [0]:
# 📦 Install TextBlob Library
%pip install textblob


Collecting textblob
  Downloading textblob-0.19.0-py3-none-any.whl.metadata (4.4 kB)
Collecting nltk>=3.9 (from textblob)
  Downloading nltk-3.9.2-py3-none-any.whl.metadata (3.2 kB)
Collecting regex>=2021.8.3 (from nltk>=3.9->textblob)
  Downloading regex-2025.11.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl.metadata (40 kB)
Collecting tqdm (from nltk>=3.9->textblob)
  Downloading tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
Downloading textblob-0.19.0-py3-none-any.whl (624 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/624.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m624.3/624.3 kB[0m [31m10.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading nltk-3.9.2-py3-none-any.whl (1.5 MB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.5 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [

In [0]:
# ============================================================
# 💬 Step 2: Sentiment Analysis (VADER version) — MarketMind
# ============================================================

import pandas as pd
import os
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

print("🚀 Starting Step 2: Sentiment Analysis...")

# ---------- Ensure VADER Lexicon is Available ----------
try:
    nltk.data.find('sentiment/vader_lexicon.zip')
except LookupError:
    print("📦 Downloading 'vader_lexicon' for NLTK...")
    nltk.download('vader_lexicon')

# ---------- Paths ----------
input_path = "/Workspace/Users/abhishekgantana1@gmail.com/marketmind/outputs/ingested_data.csv"
output_dir = "/Workspace/Users/abhishekgantana1@gmail.com/marketmind/outputs"
output_path = os.path.join(output_dir, "sentiment_data.csv")

# ---------- File Check ----------
if not os.path.exists(input_path):
    raise FileNotFoundError(f"❌ Missing file: {input_path}")
else:
    print(f"✅ Input file found: {input_path}")

# ---------- Load Data ----------
df = pd.read_csv(input_path)
print(f"✅ Loaded data: {df.shape[0]} rows, {df.shape[1]} columns")

# ---------- Detect Text-Like Column ----------
text_columns = [col for col in df.columns if df[col].dtype == 'object']
text_column = None

for candidate in ['text', 'headline', 'review', 'description', 'comment']:
    if candidate in text_columns:
        text_column = candidate
        break

# ---------- Sentiment Analysis ----------
sia = SentimentIntensityAnalyzer()

if text_column:
    print(f"🧠 Detected text column: '{text_column}' — running VADER sentiment analysis...")
    df['sentiment_score'] = df[text_column].apply(lambda x: sia.polarity_scores(str(x))['compound'])
    df['sentiment_label'] = df['sentiment_score'].apply(
        lambda x: 'Positive' if x > 0.05 else ('Negative' if x < -0.05 else 'Neutral')
    )
else:
    print("⚠️ No suitable text column found — assigning neutral sentiment (0).")
    df['sentiment_score'] = 0
    df['sentiment_label'] = 'Neutral'

print("✅ Sentiment analysis completed successfully using VADER.")

# ---------- Save Output ----------
os.makedirs(output_dir, exist_ok=True)
df.to_csv(output_path, index=False)
print(f"💾 Sentiment data saved at: {output_path}")

# ---------- Exit Gracefully ----------
dbutils.notebook.exit("✅ Step 2 completed successfully — Sentiment Analysis Done.")


In [0]:
df.columns.tolist()


['date',
 'company',
 'ticker',
 'product_category',
 'region',
 'sales_volume',
 'revenue',
 'price',
 'marketing_spend',
 'discount_rate',
 'inventory_level',
 'promotion_flag',
 'holiday_flag',
 'sentiment_score',
 'positive_mentions',
 'negative_mentions',
 'neutral_mentions',
 'news_volume',
 'social_buzz_index',
 'open_price',
 'close_price',
 'high_price',
 'low_price',
 'volume',
 'sector_index',
 'market_return',
 'sales_lag_7',
 'sales_rolling_mean_30',
 'sentiment_lag_1',
 'sentiment_rolling_mean_7',
 'stock_pct_change',
 'volatility_index',
 'marketing_growth_rate',
 'data_ingested_timestamp']