In [6]:
# Cell 1: Load Stock Data and Initial Preparation

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Set plotting style
sns.set_style("whitegrid")
plt.style.use('seaborn-v0_8-deep')


# 1. Define File Path and Load Data
# We will start with Apple (AAPL) as an example
STOCK_TICKER = 'AAPL'
STOCK_FILE_PATH = f'../data/yfinance_data/{STOCK_TICKER}.csv'

try:
    # Load the stock data (assuming your data is saved as CSV)
    df_stock = pd.read_csv(STOCK_FILE_PATH)
    
    # 2. Convert Date Column to Datetime and Set as Index
    # Ensure the date column is named 'Date' (common for yfinance data)
    df_stock['Date'] = pd.to_datetime(df_stock['Date'])
    df_stock.set_index('Date', inplace=True)
    
    # 3. Initial Inspection
    print(f"--- Data for {STOCK_TICKER} Loaded ---")
    print(f"\nDataFrame shape: {df_stock.shape}")
    print("\nFirst 5 rows of the dataset:")
    print(df_stock.head())
    print("\nData types and null counts:")
    df_stock.info()

except FileNotFoundError:
    print(f"Error: File not found at {STOCK_FILE_PATH}. Please check the path and file extension.")
    df_stock = None
except Exception as e:
    print(f"An error occurred during data loading or preparation: {e}")

--- Data for AAPL Loaded ---

DataFrame shape: (3774, 5)

First 5 rows of the dataset:
               Close      High       Low      Open      Volume
Date                                                          
2009-01-02  2.721686  2.730385  2.554037  2.575630   746015200
2009-01-05  2.836553  2.884539  2.780469  2.794266  1181608400
2009-01-06  2.789767  2.914229  2.770872  2.877641  1289310400
2009-01-07  2.729484  2.774170  2.706990  2.753477   753048800
2009-01-08  2.780169  2.793666  2.700393  2.712090   673500800

Data types and null counts:
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 3774 entries, 2009-01-02 to 2023-12-29
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Close   3774 non-null   float64
 1   High    3774 non-null   float64
 2   Low     3774 non-null   float64
 3   Open    3774 non-null   float64
 4   Volume  3774 non-null   int64  
dtypes: float64(4), int64(1)
memory usage: 176.9 KB


In [7]:
# Cell 2: Process All 6 Stocks - Calculate Technical Indicators and Export Data

import talib as ta

# List of target stock tickers
TICKERS = ['AAPL', 'AMZN', 'GOOG', 'META', 'MSFT', 'NVDA']
STOCK_DATA_DIR = '../data/yfinance_data/'

# Dictionary to hold the final processed DataFrames for visualization/next steps
processed_data_frames = {}

def process_stock_data(ticker):
    """Loads stock data, calculates TA-Lib indicators, cleans NaNs, and exports."""
    print(f"\n--- Processing {ticker} ---")
    
    # 1. Load Data
    raw_file_path = f'{STOCK_DATA_DIR}{ticker}.csv'
    try:
        df_stock = pd.read_csv(raw_file_path)
    except FileNotFoundError:
        print(f"Error: Raw data file not found for {ticker}.")
        return None

    # 2. Data Preparation
    df_stock['Date'] = pd.to_datetime(df_stock['Date'])
    df_stock.set_index('Date', inplace=True)
    df_stock['Close'] = df_stock['Close'].astype(float) 

    # 3. Calculate Technical Indicators (Task 2 Analysis)
    df_stock['SMA_20'] = ta.SMA(df_stock['Close'], timeperiod=20)
    df_stock['RSI'] = ta.RSI(df_stock['Close'], timeperiod=14)
    macd, macdsignal, macdhist = ta.MACD(df_stock['Close'], fastperiod=12, slowperiod=26, signalperiod=9)
    df_stock['MACD'] = macd
    df_stock['MACD_Signal'] = macdsignal
    df_stock['MACD_Hist'] = macdhist
    
    # 4. Cleaning and Exporting
    # Drop NaNs that result from indicator calculation
    df_processed = df_stock.dropna().copy()
    processed_file_path = f'{STOCK_DATA_DIR}processed_{ticker}_data.csv'
    df_processed.to_csv(processed_file_path)

    print(f"✅ Success: Saved processed data to {processed_file_path}. Shape: {df_processed.shape}")
    return df_processed

# Execute the function for all tickers
for ticker in TICKERS:
    df = process_stock_data(ticker)
    if df is not None:
        processed_data_frames[ticker] = df

print("\n--- All 6 Stock Data Processing Complete ---")


--- Processing AAPL ---
✅ Success: Saved processed data to ../data/yfinance_data/processed_AAPL_data.csv. Shape: (3741, 10)

--- Processing AMZN ---
✅ Success: Saved processed data to ../data/yfinance_data/processed_AMZN_data.csv. Shape: (3741, 10)

--- Processing GOOG ---
✅ Success: Saved processed data to ../data/yfinance_data/processed_GOOG_data.csv. Shape: (3741, 10)

--- Processing META ---
✅ Success: Saved processed data to ../data/yfinance_data/processed_META_data.csv. Shape: (2890, 10)

--- Processing MSFT ---
✅ Success: Saved processed data to ../data/yfinance_data/processed_MSFT_data.csv. Shape: (3741, 10)

--- Processing NVDA ---
✅ Success: Saved processed data to ../data/yfinance_data/processed_NVDA_data.csv. Shape: (3741, 10)

--- All 6 Stock Data Processing Complete ---
