In [15]:
# --- Imports for Task 2 ---
import pandas as pd
import talib
import matplotlib.pyplot as plt
import seaborn as sns
import os
import logging
from financial_news import NewsDataLoader  # From Task 1


In [17]:
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', filename='task2_analysis.log', filemode='a')
logger = logging.getLogger(__name__)
console = logging.StreamHandler()
console.setLevel(logging.INFO)
logger.addHandler(console)

# Stock symbols
stock_symbols = ['TSLA', 'NVDA', 'META', 'AMZN', 'GOOG', 'AAPL', 'MSFT']

# Directories
DATA_DIR = os.path.abspath('../data')
PLOTS_DIR = os.path.abspath('../Plots-task2')
os.makedirs(DATA_DIR, exist_ok=True)
os.makedirs(PLOTS_DIR, exist_ok=True)

# Set plotting style
sns.set_theme(style="whitegrid")
logger.info("Starting Task 2: Technical Analysis for %s", ', '.join(stock_symbols))

Starting Task 2: Technical Analysis for TSLA, NVDA, META, AMZN, GOOG, AAPL, MSFT
Starting Task 2: Technical Analysis for TSLA, NVDA, META, AMZN, GOOG, AAPL, MSFT
Starting Task 2: Technical Analysis for TSLA, NVDA, META, AMZN, GOOG, AAPL, MSFT


In [18]:
# Load and prepare stock data
def load_stock_data(symbol, data_dir):
    csv_path = os.path.join(data_dir, f"{symbol}_historical_data.csv")
    logger.info("Loading %s from %s", symbol, csv_path)
    try:
        df = pd.read_csv(csv_path)
        if df.empty:
            logger.warning("%s is empty", csv_path)
            return None

        # Log columns for debugging
        logger.info("Columns in %s: %s", symbol, df.columns.tolist())

        # Date processing
        date_cols = ['Date', 'date', 'Datetime', 'datetime', 'Timestamp', 'timestamp']
        date_col = next((col for col in date_cols if col in df.columns), None)
        if not date_col:
            logger.error("No date column in %s", csv_path)
            return None

        df[date_col] = pd.to_datetime(df[date_col], errors='coerce')
        df = df.dropna(subset=[date_col])
        df.set_index(date_col, inplace=True)

        # OHLCV column mapping
        col_map = {}
        for col in df.columns:
            col_lower = col.lower().replace(' ', '_')
            if col_lower in ['open', 'open_price']:
                col_map[col] = 'Open'
            elif col_lower in ['high', 'high_price']:
                col_map[col] = 'High'
            elif col_lower in ['low', 'low_price']:
                col_map[col] = 'Low'
            elif col_lower in ['close', 'close_price', 'adj_close', 'adjusted_close']:
                col_map[col] = 'Close'
            elif col_lower in ['volume']:
                col_map[col] = 'Volume'

        df.rename(columns=col_map, inplace=True)
        required_cols = ['Open', 'High', 'Low', 'Close', 'Volume']
        missing_cols = [col for col in required_cols if col not in df.columns]
        if missing_cols:
            logger.error("Missing columns in %s: %s", symbol, missing_cols)
            return None

        df = df[required_cols].copy()
        df = df.apply(pd.to_numeric, errors='coerce').ffill().bfill().dropna()
        if df.empty:
            logger.warning("No valid data after cleaning for %s", symbol)
            return None

        # Save compressed .pkl
        pkl_path = os.path.join(data_dir, f"{symbol}_processed.pkl")
        df.to_pickle(pkl_path, compression='gzip')
        logger.info("Saved processed data to %s", pkl_path)
        return df
    except FileNotFoundError:
        logger.error("%s not found", csv_path)
        return None
    except Exception as e:
        logger.error("Error loading %s: %s", symbol, e)
        return None

In [19]:
# Load news data (Task 3 preparation)
def load_news_data(data_dir):
    csv_path = os.path.join(data_dir, 'raw_analyst_ratings.csv')
    logger.info("Loading news from %s", csv_path)
    try:
        news_loader = NewsDataLoader(csv_path)
        news_df = news_loader.load()
        news_df['date'] = pd.to_datetime(news_df['date'], utc=True, errors='coerce').dt.tz_convert('UTC-4')
        news_df = news_df.dropna(subset=['date'])
        pkl_path = os.path.join(data_dir, 'processed_news_data.pkl')
        news_df.to_pickle(pkl_path, compression='gzip')
        logger.info("Saved processed news data to %s", pkl_path)
        return news_df
    except Exception as e:
        logger.error("Error loading news data: %s", e)
        return None

In [20]:
# Calculate indicators and plot
all_stocks_data = {}
for symbol in stock_symbols:
    df = load_stock_data(symbol, DATA_DIR)
    if df is not None:
        all_stocks_data[symbol] = df

if not all_stocks_data:
    logger.error("No stock data loaded. Exiting.")
    exit(1)

logger.info("Loaded data for %d stocks: %s", len(all_stocks_data), list(all_stocks_data.keys()))

Loading TSLA from d:\Documents\Projects\10 Academy\Stock Market\financial-news-analysis\data\TSLA_historical_data.csv
Loading TSLA from d:\Documents\Projects\10 Academy\Stock Market\financial-news-analysis\data\TSLA_historical_data.csv
Loading TSLA from d:\Documents\Projects\10 Academy\Stock Market\financial-news-analysis\data\TSLA_historical_data.csv
Columns in TSLA: ['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume', 'Dividends', 'Stock Splits']
Columns in TSLA: ['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume', 'Dividends', 'Stock Splits']
Columns in TSLA: ['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume', 'Dividends', 'Stock Splits']
Saved processed data to d:\Documents\Projects\10 Academy\Stock Market\financial-news-analysis\data\TSLA_processed.pkl
Saved processed data to d:\Documents\Projects\10 Academy\Stock Market\financial-news-analysis\data\TSLA_processed.pkl
Saved processed data to d:\Documents\Projects\10 Academy\Stock Market\financial-

In [21]:
# Load news data
news_df = load_news_data(DATA_DIR)

for symbol, df in all_stocks_data.items():
    logger.info("Processing %s", symbol)
    try:
        # Calculate indicators
        df['SMA_20'] = talib.SMA(df['Close'], timeperiod=20)
        df['RSI_14'] = talib.RSI(df['Close'], timeperiod=14)
        macd, signal, hist = talib.MACD(df['Close'], fastperiod=12, slowperiod=26, signalperiod=9)
        df['MACD'] = macd
        df['MACD_Signal'] = signal
        df['MACD_Hist'] = hist

        # Save indicators
        pkl_path = os.path.join(DATA_DIR, f"{symbol}_indicators.pkl")
        df.to_pickle(pkl_path, compression='gzip')
        logger.info("Saved indicators to %s", pkl_path)

        # Plot SMA
        plt.figure(figsize=(10, 5))
        plt.plot(df.index, df['Close'], label='Close', color='blue')
        plt.plot(df.index, df['SMA_20'], label='SMA 20', color='orange', ls='--')
        plt.title(f'{symbol} Price & SMA')
        plt.xlabel('Date')
        plt.ylabel('Price')
        plt.legend()
        plt.tight_layout()
        plt.savefig(os.path.join(PLOTS_DIR, f'{symbol}_sma.png'))
        plt.close()

        # Plot RSI
        if not df['RSI_14'].isnull().all():
            plt.figure(figsize=(10, 4))
            plt.plot(df.index, df['RSI_14'], label='RSI 14', color='purple')
            plt.axhline(70, color='red', ls='--', alpha=0.5)
            plt.axhline(30, color='green', ls='--', alpha=0.5)
            plt.title(f'{symbol} RSI')
            plt.xlabel('Date')
            plt.ylabel('RSI')
            plt.ylim(0, 100)
            plt.legend()
            plt.tight_layout()
            plt.savefig(os.path.join(PLOTS_DIR, f'{symbol}_rsi.png'))
            plt.close()

        # Plot MACD
        if not df['MACD'].isnull().all():
            fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 6), sharex=True)
            ax1.plot(df.index, df['MACD'], label='MACD', color='blue')
            ax1.plot(df.index, df['MACD_Signal'], label='Signal', color='red', ls='--')
            ax1.legend()
            ax2.bar(df.index, df['MACD_Hist'], color=['green' if v >= 0 else 'red' for v in df['MACD_Hist']], alpha=0.7)
            ax2.axhline(0, color='grey', ls='--')
            ax2.set_xlabel('Date')
            fig.suptitle(f'{symbol} MACD')
            plt.tight_layout()
            plt.savefig(os.path.join(PLOTS_DIR, f'{symbol}_macd.png'))
            plt.close()
    except Exception as e:
        logger.error("Error processing %s: %s", symbol, e)
        continue

Loading news from d:\Documents\Projects\10 Academy\Stock Market\financial-news-analysis\data\raw_analyst_ratings.csv
Loading news from d:\Documents\Projects\10 Academy\Stock Market\financial-news-analysis\data\raw_analyst_ratings.csv
Loading news from d:\Documents\Projects\10 Academy\Stock Market\financial-news-analysis\data\raw_analyst_ratings.csv


DEBUG: [NewsDataLoader.load] Attempting to load: d:\Documents\Projects\10 Academy\Stock Market\financial-news-analysis\data\raw_analyst_ratings.csv
DEBUG: [NewsDataLoader.load] CSV loaded. Columns: ['Unnamed: 0', 'headline', 'url', 'publisher', 'date', 'stock']
DEBUG: [NewsDataLoader.load] Initial 'date' column dtype: object
DEBUG: [NewsDataLoader.load] First 5 'date' values (raw): ['2020-06-05 10:30:54-04:00', '2020-06-03 10:45:20-04:00', '2020-05-26 04:30:07-04:00', '2020-05-22 12:45:06-04:00', '2020-05-22 11:38:59-04:00']
DEBUG: [NewsDataLoader.load] Attempting explicit pd.to_datetime on 'date' column.
DEBUG: [NewsDataLoader.load] After explicit pd.to_datetime, 'date' column dtype: datetime64[ns, UTC-04:00]
DEBUG: [NewsDataLoader.load] First 5 'date' values (after parse): [Timestamp('2020-06-05 10:30:54-0400', tz='UTC-04:00'), Timestamp('2020-06-03 10:45:20-0400', tz='UTC-04:00'), Timestamp('2020-05-26 04:30:07-0400', tz='UTC-04:00'), Timestamp('2020-05-22 12:45:06-0400', tz='UTC-04

Error loading news data: invalid literal for int() with base 10: '-'
Error loading news data: invalid literal for int() with base 10: '-'
Error loading news data: invalid literal for int() with base 10: '-'
Processing TSLA
Processing TSLA
Processing TSLA
Error processing TSLA: Argument 'real' has incorrect type (expected numpy.ndarray, got DataFrame)
Error processing TSLA: Argument 'real' has incorrect type (expected numpy.ndarray, got DataFrame)
Error processing TSLA: Argument 'real' has incorrect type (expected numpy.ndarray, got DataFrame)
Processing NVDA
Processing NVDA
Processing NVDA
Error processing NVDA: Argument 'real' has incorrect type (expected numpy.ndarray, got DataFrame)
Error processing NVDA: Argument 'real' has incorrect type (expected numpy.ndarray, got DataFrame)
Error processing NVDA: Argument 'real' has incorrect type (expected numpy.ndarray, got DataFrame)
Processing META
Processing META
Processing META
Error processing META: Argument 'real' has incorrect type (ex

In [22]:
# Task 3 preparation
if news_df is not None and all_stocks_data:
    logger.info("Preparing for Task 3 correlation analysis")
    for symbol in all_stocks_data:
        df = all_stocks_data[symbol]
        df['Daily_Return'] = df['Close'].pct_change()
        logger.info("Computed daily returns for %s. Awaiting sentiment analysis.", symbol)

logger.info("Task 2 completed. Plots in %s", PLOTS_DIR)

Task 2 completed. Plots in d:\Documents\Projects\10 Academy\Stock Market\financial-news-analysis\Plots-task2
Task 2 completed. Plots in d:\Documents\Projects\10 Academy\Stock Market\financial-news-analysis\Plots-task2
Task 2 completed. Plots in d:\Documents\Projects\10 Academy\Stock Market\financial-news-analysis\Plots-task2
