In [1]:
import pandas as pd
import numpy as np
import os

In [2]:
def calculate_technical_indicators(df, close_col='close', high_col='high', low_col='low', volume_col='volume'):
    """
    Calculate technical indicators for stock price analysis.
    
    Parameters:
    df (pandas.DataFrame): DataFrame with columns for close, high, low, and volume
    close_col (str): Name of closing price column
    high_col (str): Name of high price column
    low_col (str): Name of low price column
    volume_col (str): Name of volume column
    
    Returns:
    pandas.DataFrame: Original data with additional technical indicators
    """
    df = df.copy()
    
    # Trend Indicators
    # Moving Averages
    df['sma_5'] = df[close_col].rolling(window=5).mean()
    df['sma_20'] = df[close_col].rolling(window=20).mean()
    df['sma_50'] = df[close_col].rolling(window=50).mean()
    
    # Exponential Moving Average
    df['ema_12'] = df[close_col].ewm(span=12, adjust=False).mean()
    df['ema_26'] = df[close_col].ewm(span=26, adjust=False).mean()
    
    # MACD
    df['macd'] = df['ema_12'] - df['ema_26']
    df['macd_signal'] = df['macd'].ewm(span=9, adjust=False).mean()
    df['macd_hist'] = df['macd'] - df['macd_signal']
    
    # Momentum Indicators
    # Relative Strength Index (RSI)
    delta = df[close_col].diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
    rs = gain / loss
    df['rsi'] = 100 - (100 / (1 + rs))
    
    # Stochastic Oscillator
    lookback = 14
    df['lowest_low'] = df[low_col].rolling(window=lookback).min()
    df['highest_high'] = df[high_col].rolling(window=lookback).max()
    df['stoch_k'] = 100 * (df[close_col] - df['lowest_low']) / (df['highest_high'] - df['lowest_low'])
    df['stoch_d'] = df['stoch_k'].rolling(window=3).mean()
    
    # Volatility Indicators
    # Bollinger Bands
    df['bb_middle'] = df[close_col].rolling(window=20).mean()
    df['bb_upper'] = df['bb_middle'] + 2 * df[close_col].rolling(window=20).std()
    df['bb_lower'] = df['bb_middle'] - 2 * df[close_col].rolling(window=20).std()
    
    # Average True Range (ATR)
    high_low = df[high_col] - df[low_col]
    high_close = np.abs(df[high_col] - df[close_col].shift())
    low_close = np.abs(df[low_col] - df[close_col].shift())
    ranges = pd.concat([high_low, high_close, low_close], axis=1)
    true_range = np.max(ranges, axis=1)
    df['atr'] = true_range.rolling(14).mean()
    
    # Volume-based Indicators
    # On-Balance Volume (OBV)
    df['daily_ret'] = df[close_col].pct_change()
    df['obv'] = np.where(df['daily_ret'] > 0, df[volume_col], 
                        np.where(df['daily_ret'] < 0, -df[volume_col], 0)).cumsum()
    
    # Volume-Weighted Average Price (VWAP)
    df['vwap'] = (df[close_col] * df[volume_col]).cumsum() / df[volume_col].cumsum()
    
    # Price Rate of Change
    df['roc_5'] = df[close_col].pct_change(periods=5) * 100
    df['roc_20'] = df[close_col].pct_change(periods=20) * 100
    
    # Additional Derived Features
    df['price_volatility'] = df[close_col].rolling(window=20).std()
    df['volume_volatility'] = df[volume_col].rolling(window=20).std()
    
    return df

In [8]:
dir_path = 'price/raw'
out_path = 'price/raw_with_features'
os.makedirs(out_path, exist_ok=True)
for filename in os.listdir(dir_path):
    file_path = os.path.join(dir_path, filename)
    file_out_path = os.path.join(out_path, filename)

    key = os.path.splitext(filename)[0]
    df = pd.read_csv(file_path)
    df = calculate_technical_indicators(df, close_col='Close', high_col='High', low_col='Low', volume_col='Volume')
    df.to_csv(file_out_path, index=False)


In [9]:
apple_df = pd.read_csv(os.path.join(out_path, 'AAPL.csv'))

In [10]:
apple_df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,sma_5,sma_20,sma_50,...,bb_upper,bb_lower,atr,daily_ret,obv,vwap,roc_5,roc_20,price_volatility,volume_volatility
0,2019-12-31,72.482498,73.419998,72.379997,73.412498,72.552094,100805600,,,,...,,,,,0,73.412498,,,,
1,2020-01-02,74.059998,75.150002,73.797501,75.087502,74.207466,135480400,,,,...,,,,0.022816,135480400,74.372903,,,,
2,2020-01-03,74.287498,75.144997,74.125000,74.357498,73.486023,146322800,,,,...,,,,-0.009722,-10842400,74.367012,,,,
3,2020-01-06,73.447502,74.989998,73.187500,74.949997,74.071579,118387200,,,,...,,,,0.007968,107544800,74.504773,,,,
4,2020-01-07,74.959999,75.224998,74.370003,74.597504,73.723213,108872000,74.481000,,,...,,,,-0.004703,-1327200,74.521327,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
752,2022-12-23,130.919998,132.419998,129.639999,131.860001,131.658981,63814900,132.841998,140.6610,144.131800,...,152.246708,129.075292,4.389285,-0.002798,243000200,121.571366,-1.970109,-10.971575,5.792854,2.239374e+07
753,2022-12-27,131.380005,131.410004,128.720001,130.029999,129.831772,69007800,132.373999,139.9515,143.964799,...,152.330344,127.572656,4.229285,-0.013878,173992400,121.578265,-1.767769,-9.839136,6.189422,2.240074e+07
754,2022-12-28,129.669998,131.029999,125.870003,126.040001,125.847855,85438400,131.121999,139.1950,143.637399,...,153.024558,125.365442,4.357142,-0.030685,88554000,121.582767,-4.731672,-10.717573,6.914779,2.241224e+07
755,2022-12-29,127.989998,130.479996,127.730003,129.610001,129.412415,75703700,129.953999,138.2740,143.354599,...,152.079562,124.468438,4.489999,0.028324,164257700,121.589936,-4.311551,-12.443422,6.902781,2.132031e+07
