In [2]:
import pandas as pd
import numpy as np
import ta # Import the Technical Analysis library
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
df = pd.read_csv('AAPL_historical_data.csv', index_col = 'Date', parse_dates=True)

current_columns = df.columns.tolist()
if 'Open_AAPL' in current_columns:
    rename_map = {
        'Open_AAPL': 'Open',
        'High_AAPL': 'High',
        'Low_AAPL': 'Low',
        'Close_AAPL': 'Close',
        'Adj Close_AAPL': 'Adj Close', # yfinance usually has 'Adj Close'
        'Volume_AAPL': 'Volume'
    }
    df = df.rename(columns=rename_map)

df.head()


FileNotFoundError: [Errno 2] No such file or directory: 'AAPL_historical_data.csv'

In [27]:
# --- Trend Indicators ---
# Moving Average Convergence Divergence (MACD)
df['MACD'] = ta.trend.macd(df['Close'])
df['MACD_Signal'] = ta.trend.macd_signal(df['Close'])
df['MACD_Diff'] = ta.trend.macd_diff(df['Close'])

# Exponential Moving Average (EMA) - short and long period
df['EMA_12'] = ta.trend.ema_indicator(df['Close'], window=12)
df['EMA_26'] = ta.trend.ema_indicator(df['Close'], window=26)

# Average Directional Movement Index (ADX)
df['ADX'] = ta.trend.adx(df['High'], df['Low'], df['Close'], window=14)
df['ADX_pos'] = ta.trend.adx_pos(df['High'], df['Low'], df['Close'], window=14)
df['ADX_neg'] = ta.trend.adx_neg(df['High'], df['Low'], df['Close'], window=14)

# --- Volatility Indicators ---
# Bollinger Bands
df['BBL'] = ta.volatility.bollinger_lband(df['Close'], window=20, window_dev=2)
df['BBM'] = ta.volatility.bollinger_mavg(df['Close'], window=20)
df['BBH'] = ta.volatility.bollinger_hband(df['Close'], window=20, window_dev=2)
df['BB_bandwidth'] = ta.volatility.bollinger_wband(df['Close'], window=20, window_dev=2)
df['BB_percent'] = ta.volatility.bollinger_pband(df['Close'], window=20, window_dev=2)

# Average True Range (ATR)
df['ATR'] = ta.volatility.average_true_range(df['High'], df['Low'], df['Close'], window=14)

# --- Momentum Indicators ---
# Relative Strength Index (RSI)
df['RSI'] = ta.momentum.rsi(df['Close'], window=14)

# Stochastic Oscillator
df['Stoch_K'] = ta.momentum.stoch(df['High'], df['Low'], df['Close'], window=14, smooth_window=3)
df['Stoch_D'] = ta.momentum.stoch_signal(df['High'], df['Low'], df['Close'], window=14, smooth_window=3)

# Rate of Change (ROC)
df['ROC'] = ta.momentum.roc(df['Close'], window=12)

# --- Volume Indicators ---
# On-Balance Volume (OBV)
df['OBV'] = ta.volume.on_balance_volume(df['Close'], df['Volume'])

# --- Add your previous basic features again, ensuring they use 'Close' ---
# Previous day's close
df['Prev_Close'] = df['Close'].shift(1)

# Simple Moving Average (SMA_10) - 10-day moving average
df['SMA_10'] = df['Close'].rolling(window=10).mean()

# --- Re-create the Target Variable (2 classes: Up/Down) ---
# Calculate daily percentage change for target
df['Daily_Change_Pct'] = (df['Close'].shift(-1) - df['Close']) / df['Close'] * 100

# For a 2-class problem, we're simply interested if it goes up or down.
# You could still use a small threshold to filter out tiny, insignificant moves if desired,
# but for simplicity, let's just make it purely binary for now.

def classify_binary_movement(change_pct):
    if change_pct > 0: # If price goes up
        return 1 # Up
    else: # If price goes down or stays exactly the same
        return 0 # Down (or flat/no change, lumped into 'Down' for simplicity)

# Apply the classification function
df['Target'] = df['Daily_Change_Pct'].apply(classify_binary_movement)

# Drop the last row which will have NaN for Target, then drop all NaNs from features
df_final = df.dropna()

print("\n--- DataFrame with Advanced Features and Target ---")
print(df_final.head())
print("\n--- Info for Final DataFrame ---")
df_final.info()
print("\n--- Missing Values Check in Final DataFrame ---")
print(df_final.isnull().sum().sum()) # Should be 0


--- DataFrame with Advanced Features and Target ---
                Close       High        Low       Open     Volume      MACD  \
Date                                                                          
2020-02-20  77.628288  78.682561  77.121754  78.192994  100566000  0.843612   
2020-02-21  75.871155  77.664633  75.253137  77.221108  129554000  0.625892   
2020-02-24  72.267265  73.721433  70.098137  72.044297  222195200  0.160691   
2020-02-25  69.819412  73.321535  69.346812  72.938607  230673600 -0.400884   
2020-02-26  70.926994  72.194546  69.436475  69.443745  198054800 -0.747942   

            MACD_Signal  MACD_Diff     EMA_12     EMA_26  ...       ATR  \
Date                                                      ...             
2020-02-20     0.950526  -0.106914  77.817074  76.973463  ...  1.628808   
2020-02-21     0.885599  -0.259707  77.517702  76.891810  ...  1.684715   
2020-02-24     0.740618  -0.579926  76.709943  76.549251  ...  1.976736   
2020-02-25     0.5

Explanation of New Features:
Trend Indicators:
MACD (Moving Average Convergence Divergence): Shows the relationship between two moving averages of a security's price. It's used to identify momentum, direction, and duration of a trend.

EMA (Exponential Moving Average): Similar to SMA but gives more weight to recent prices. Often used in pairs (e.g., 12-day and 26-day EMA) for crossovers.

ADX (Average Directional Movement Index): Measures the strength of a trend, not its direction. It has positive and negative directional indicators (+DI and -DI).

Volatility Indicators:
Bollinger Bands (BBL, BBM, BBH, bandwidth, percent): Show the deviation of price from a central moving average. Useful for identifying overbought/oversold conditions and volatility.

ATR (Average True Range): Measures market volatility by decomposing the entire range of an asset price for that period.

Momentum Indicators:
RSI (Relative Strength Index): A momentum oscillator that measures the speed and change of price movements. It indicates overbought (>70) or oversold (<30) conditions.

Stochastic Oscillator (Stoch_K, Stoch_D): Compares a security's closing price to its price range over a given time period. It also identifies overbought/oversold conditions.

ROC (Rate of Change): Measures the percentage change between the current price and the price a certain number of periods ago.

Volume Indicators:
OBV (On-Balance Volume): Relates volume to price changes. It's a cumulative total of volume, used to confirm price trends.

In [28]:
# Save the final DataFrame to a CSV file
df_final.to_csv('AAPL_advanced_features.csv', index=True)