#### Import and Setup

In [1]:
# Quantitative Analysis - Technical Indicators
# KAIM Week 1 - Task 2

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import yfinance as yf
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

%matplotlib inline
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette("husl")

print("✓ Libraries imported successfully!")
print(f"Analysis Date: {datetime.now().strftime('%Y-%m-%d %H:%M')}")

✓ Libraries imported successfully!
Analysis Date: 2025-11-21 20:52


#### Load News Data & Identify Stocks

In [3]:
# Load news data to identify which stocks to analyze
news_df = pd.read_csv('../data/newsData/raw_analyst_ratings.csv')  

# Get date range from news data
news_df['date'] = pd.to_datetime(news_df['date'], errors='coerce')
news_start = news_df['date'].min()
news_end = news_df['date'].max()

print("=" * 50)
print("NEWS DATA SUMMARY")
print("=" * 50)
print(f"Date range: {news_start.date()} to {news_end.date()}")
print(f"Total articles: {len(news_df)}")
print(f"Unique stocks: {news_df['stock'].nunique()}")

# Top 10 stocks by article count
top_stocks = news_df['stock'].value_counts().head(10)
print(f"\nTop 10 Stocks by News Coverage:")
print(top_stocks)

NEWS DATA SUMMARY
Date range: 2011-04-27 to 2020-06-11
Total articles: 1407328
Unique stocks: 6204

Top 10 Stocks by News Coverage:
stock
MRK     3333
MS      3238
NVDA    3146
MU      3142
QQQ     3106
NFLX    3028
M       3025
EBAY    3018
GILD    2968
VZ      2966
Name: count, dtype: int64


#### Download Stock Price data

In [4]:
# Select top 6 stocks for analysis
symbols = news_df['stock'].value_counts().head(6).index.tolist()
print(f"Analyzing stocks: {symbols}")

# Set date range (extend slightly beyond news data)
start_date = (news_start - timedelta(days=60)).strftime('%Y-%m-%d')
end_date = (news_end + timedelta(days=30)).strftime('%Y-%m-%d')

print(f"\nDownloading data from {start_date} to {end_date}...")

# Download stock data
stock_data = {}
for symbol in symbols:
    try:
        ticker = yf.Ticker(symbol)
        df = ticker.history(start=start_date, end=end_date)
        if len(df) > 100:  # At least 100 days of data
            stock_data[symbol] = df
            print(f"  ✓ {symbol}: {len(df)} trading days")
        else:
            print(f"  ✗ {symbol}: Insufficient data ({len(df)} days)")
    except Exception as e:
        print(f"  ✗ {symbol}: Error - {str(e)[:50]}")

print(f"\n✓ Successfully loaded {len(stock_data)} stocks")

Analyzing stocks: ['MRK', 'MS', 'NVDA', 'MU', 'QQQ', 'NFLX']

Downloading data from 2011-02-26 to 2020-07-11...
  ✓ MRK: 2358 trading days
  ✓ MS: 2358 trading days
  ✓ NVDA: 2358 trading days
  ✓ MU: 2358 trading days
  ✓ QQQ: 2358 trading days
  ✓ NFLX: 2358 trading days

✓ Successfully loaded 6 stocks
