In [10]:
import pandas as pd
import yfinance as yf # For fetching stock data
import matplotlib.pyplot as plt
import seaborn as sns

import numpy as np

In [11]:

# --- 1. Load Your News Data (to get stock tickers and date ranges) ---
try:
    news_df = pd.read_csv('../data/raw/financial_news.csv', parse_dates=['date'])
    # news_df['date'] = news_df['date'].dt.tz_localize(None) # Remove timezone if yf has issues, or align later
except FileNotFoundError:
    print("Using DUMMY news_df for ticker list and date range.")
    _data = {
        'date': pd.to_datetime(["2023-01-01", "2023-10-30"]), # Example date range
        'stock': ["AAPL", "MSFT"] # Example tickers
    }
    news_df = pd.DataFrame(_data)

# Get unique stock symbols and overall date range from your news data
unique_tickers = news_df['stock'].unique()
# Ensure 'date' column is datetime
news_df['date'] = pd.to_datetime(news_df['date'], errors='coerce')
if not news_df['date'].empty:
    min_date = news_df['date'].min().strftime('%Y-%m-%d')
    max_date = news_df['date'].max().strftime('%Y-%m-%d')
    # Extend date range slightly for calculating indicators (e.g., need prior data for MAs)
    start_date = (pd.to_datetime(min_date) - pd.DateOffset(months=3)).strftime('%Y-%m-%d')
    end_date = (pd.to_datetime(max_date) + pd.DateOffset(days=1)).strftime('%Y-%m-%d') # +1 day to include max_date
else: # Fallback if dates are missing
    start_date = "2022-01-01"
    end_date = "2023-12-31"

print(f"Tickers from news: {unique_tickers}")
print(f"Fetching stock data from {start_date} to {end_date}")


Tickers from news: ['A' 'AA' 'AAC' ... 'ZU' 'ZUMZ' 'ZX']
Fetching stock data from 2011-01-27 to 2020-06-12


In [None]:
# --- 2. Load Stock Price Data using yfinance ---
# We'll fetch data for one stock for demonstration. You might loop or use yf.download for multiple.
if len(unique_tickers) == 0:
    print("No stock tickers found in news data. Using 'AAPL' as an example.")
    example_ticker = "AAPL"
else:
    example_ticker = unique_tickers[0] # Take the first ticker for this example

print(f"\nFetching data for: {example_ticker}")
try:
    stock_data = yf.download(example_ticker, start=start_date, end=end_date)
    if stock_data.empty:
        raise ValueError(f"No data found for {example_ticker}")
    print(f"\n{example_ticker} Data Head:")
    print(stock_data.head())
except Exception as e:
    print(f"Error fetching data for {example_ticker}: {e}")
    # Create dummy data if fetch fails to allow rest of script to run
    idx = pd.date_range(start_date, end_date, freq='B') # Business days
    dummy_prices = pd.Series(np.random.rand(len(idx)) * 100 + 100, index=idx).round(2)
    stock_data = pd.DataFrame({
        'Open': dummy_prices - np.random.rand(len(idx)) * 2,
        'High': dummy_prices + np.random.rand(len(idx)) * 2 + 1,
        'Low': dummy_prices - np.random.rand(len(idx)) * 2 - 1,
        'Close': dummy_prices,
        'Adj Close': dummy_prices,
        'Volume': np.random.randint(100000, 5000000, size=len(idx))
    })
    stock_data.index.name = 'Date'
    print(f"Using DUMMY data for {example_ticker}.")

In [None]:
import talib

# --- 3. Apply Analysis Indicators with TA-Lib ---
if not stock_data.empty:
    print("\nCalculating Technical Indicators...")
    # Moving Averages
    stock_data['SMA_20'] = talib.SMA(stock_data['Close'], timeperiod=20)
    stock_data['EMA_50'] = talib.EMA(stock_data['Close'], timeperiod=50)

    # RSI (Relative Strength Index)
    stock_data['RSI_14'] = talib.RSI(stock_data['Close'], timeperiod=14)

    # MACD (Moving Average Convergence Divergence)
    macd, macdsignal, macdhist = talib.MACD(stock_data['Close'], fastperiod=12, slowperiod=26, signalperiod=9)
    stock_data['MACD'] = macd
    stock_data['MACD_signal'] = macdsignal
    stock_data['MACD_hist'] = macdhist

    print("\nStock Data with Technical Indicators (Tail):")
    print(stock_data.tail())
else:
    print("Skipping TA-Lib indicators as stock_data is empty.")

In [None]:
# --- 5. Visualize the Data ---
if not stock_data.empty and 'Close' in stock_data.columns:
    print("\nVisualizing Data...")
    fig, axes = plt.subplots(3, 1, figsize=(15, 18), sharex=True)

    # Plot 1: Price and Moving Averages
    axes[0].plot(stock_data.index, stock_data['Close'], label='Close Price', color='blue', alpha=0.7)
    if 'SMA_20' in stock_data:
        axes[0].plot(stock_data.index, stock_data['SMA_20'], label='20-Day SMA', color='orange', linestyle='--')
    if 'EMA_50' in stock_data:
        axes[0].plot(stock_data.index, stock_data['EMA_50'], label='50-Day EMA', color='red', linestyle='--')
    axes[0].set_title(f'{example_ticker} Close Price and Moving Averages')
    axes[0].set_ylabel('Price')
    axes[0].legend()
    axes[0].grid(True)

    # Plot 2: RSI
    if 'RSI_14' in stock_data:
        axes[1].plot(stock_data.index, stock_data['RSI_14'], label='RSI (14)', color='purple')
        axes[1].axhline(70, color='red', linestyle='--', lw=1, label='Overbought (70)')
        axes[1].axhline(30, color='green', linestyle='--', lw=1, label='Oversold (30)')
        axes[1].set_title(f'{example_ticker} Relative Strength Index (RSI)')
        axes[1].set_ylabel('RSI')
        axes[1].legend()
        axes[1].grid(True)
    else:
        axes[1].text(0.5, 0.5, 'RSI data not available', ha='center', va='center', transform=axes[1].transAxes)


    # Plot 3: MACD
    if 'MACD' in stock_data:
        axes[2].plot(stock_data.index, stock_data['MACD'], label='MACD', color='green')
        axes[2].plot(stock_data.index, stock_data['MACD_signal'], label='Signal Line', color='red', linestyle='--')
        axes[2].bar(stock_data.index, stock_data['MACD_hist'], label='Histogram', color='gray', alpha=0.5)
        axes[2].set_title(f'{example_ticker} MACD')
        axes[2].set_ylabel('MACD Value')
        axes[2].set_xlabel('Date')
        axes[2].legend()
        axes[2].grid(True)
    else:
        axes[2].text(0.5, 0.5, 'MACD data not available', ha='center', va='center', transform=axes[2].transAxes)


    plt.tight_layout()
    plt.show()
else:
    print("Skipping visualization as stock_data is empty or lacks 'Close' column.")