In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import talib
import pynance as pn

# Print current working directory for debugging
print("Current Working Directory:", os.getcwd())

# --- 1. Load and Prepare Data ---
stock_symbol = 'AAPL'  # We'll start with AAPL; you can loop through others later
file_path = f'../data/yfinance_data/{stock_symbol}_historical_data.csv'

try:
    data = pd.read_csv(file_path)
    print(f"Loaded {stock_symbol} data successfully. Shape:", data.shape)
except FileNotFoundError:
    raise FileNotFoundError(f"Stock data file not found: {file_path}")

# Ensure required columns are present
required_columns = ['Date', 'Open', 'High', 'Low', 'Close', 'Volume']
if not all(col in data.columns for col in required_columns):
    raise ValueError(f"Missing required columns in {stock_symbol} data. Expected: {required_columns}")

# Parse dates
data['Date'] = pd.to_datetime(data['Date'])
data.set_index('Date', inplace=True)

# Handle missing values
print("\nMissing values in stock data:")
print(data.isnull().sum())
data = data.dropna()  # Drop rows with missing values

# --- 2. Calculate Technical Indicators with TA-Lib ---
# Simple Moving Average (SMA)
data['SMA_20'] = talib.SMA(data['Close'], timeperiod=20)  # 20-day SMA
data['SMA_50'] = talib.SMA(data['Close'], timeperiod=50)  # 50-day SMA

# Exponential Moving Average (EMA)
data['EMA_20'] = talib.EMA(data['Close'], timeperiod=20)  # 20-day EMA

# Relative Strength Index (RSI)
data['RSI'] = talib.RSI(data['Close'], timeperiod=14)  # 14-day RSI

# Moving Average Convergence Divergence (MACD)
data['MACD'], data['MACD_Signal'], data['MACD_Hist'] = talib.MACD(
    data['Close'], fastperiod=12, slowperiod=26, signalperiod=9
)

# --- 3. Use PyNance for Financial Metrics ---
# Calculate daily returns using PyNance
data['Daily_Return'] = pn.data.returns(data['Close'])  # Daily percentage returns

# --- 4. Visualize the Data ---
# Ensure the 'reports' directory exists
os.makedirs('reports', exist_ok=True)

# Plot 1: Close Price with SMA and EMA
plt.figure(figsize=(12, 6))
plt.plot(data.index, data['Close'], label='Close Price', color='blue')
plt.plot(data.index, data['SMA_20'], label='20-day SMA', color='orange')
plt.plot(data.index, data['SMA_50'], label='50-day SMA', color='green')
plt.plot(data.index, data['EMA_20'], label='20-day EMA', color='red')
plt.title(f'{stock_symbol} Stock Price with Moving Averages')
plt.xlabel('Date')
plt.ylabel('Price (USD)')
plt.legend()
plt.savefig(f'reports/{stock_symbol}_price_moving_averages.png')
plt.close()

# Plot 2: RSI
plt.figure(figsize=(12, 4))
plt.plot(data.index, data['RSI'], label='RSI (14-day)', color='purple')
plt.axhline(70, linestyle='--', color='red', alpha=0.5, label='Overbought (70)')
plt.axhline(30, linestyle='--', color='green', alpha=0.5, label='Oversold (30)')
plt.title(f'{stock_symbol} Relative Strength Index (RSI)')
plt.xlabel('Date')
plt.ylabel('RSI')
plt.legend()
plt.savefig(f'reports/{stock_symbol}_rsi.png')
plt.close()

# Plot 3: MACD
plt.figure(figsize=(12, 4))
plt.plot(data.index, data['MACD'], label='MACD', color='blue')
plt.plot(data.index, data['MACD_Signal'], label='Signal Line', color='orange')
plt.bar(data.index, data['MACD_Hist'], label='Histogram', color='grey', alpha=0.3)
plt.title(f'{stock_symbol} MACD')
plt.xlabel('Date')
plt.ylabel('MACD')
plt.legend()
plt.savefig(f'reports/{stock_symbol}_macd.png')
plt.close()

# Plot 4: Daily Returns
plt.figure(figsize=(12, 4))
plt.plot(data.index, data['Daily_Return'], label='Daily Returns', color='teal')
plt.title(f'{stock_symbol} Daily Returns')
plt.xlabel('Date')
plt.ylabel('Daily Return (%)')
plt.legend()
plt.savefig(f'reports/{stock_symbol}_daily_returns.png')
plt.close()

# --- 5. Save Processed Data ---
data.to_csv(f'data/processed_{stock_symbol}_historical_data.csv')

print(f"\nTask 2 completed for {stock_symbol}:")
print(f"- Technical indicators (SMA, EMA, RSI, MACD) calculated.")
print(f"- Daily returns computed using PyNance.")
print(f"- Visualizations saved to 'reports/'.")
print(f"- Processed data saved to 'data/processed_{stock_symbol}_historical_data.csv'.")

ModuleNotFoundError: No module named 'talib'