In [15]:
%%bash
pwd
pip install yfinance
pip install ta

/content
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [16]:
import yfinance as yf
import pandas as pd
import numpy as np
ticker_data = yf.download(tickers = "AAPL",  # list of tickers
            period = "730d",       # time period
            interval = "60m",       # trading interval
            ignore_tz = True,      # ignore timezone when aligning data from different exchanges?
            prepost = False) 
ticker_data.index = pd.DatetimeIndex(ticker_data.index).to_period('D')
print(ticker_data.shape)
ticker_data.head()

[*********************100%***********************]  1 of 1 completed
(5094, 6)


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-05-29,79.8125,80.0,79.1175,79.485001,79.485001,6272955
2020-05-29,79.485001,79.695,79.2425,79.580002,79.580002,3088345
2020-05-29,79.572502,79.887497,79.447502,79.712502,79.712502,3024228
2020-05-29,79.714973,79.894997,79.267502,79.741501,79.741501,2735059
2020-05-29,79.745003,80.065002,79.567497,79.982178,79.982178,2902417


In [17]:
import numpy as np
from statsmodels.tsa.holtwinters import SimpleExpSmoothing
from sklearn.metrics import mean_squared_error

def time_series_cross_validation(data, alpha_values):
    train_size = int(len(data) * 0.8)
    train, validation = data[:train_size], data[train_size:]
    
    best_alpha = None
    best_mse = np.inf
    
    for alpha in alpha_values:
        smoothing_model = SimpleExpSmoothing(train, initialization_method="heuristic").fit(smoothing_level=alpha, optimized=False)
        predictions = smoothing_model.forecast(len(validation))
        mse = mean_squared_error(validation, predictions)
        
        if mse < best_mse:
            best_mse = mse
            best_alpha = alpha
            
    return best_alpha, best_mse

# Test different alpha values
alpha_values = np.linspace(0.0001, 1, 1000)

# Assume 'Close' column contains the closing prices of the stock
best_alpha, best_mse = time_series_cross_validation(ticker_data['Close'], alpha_values)

print(f"Best alpha value: {best_alpha}, with MSE: {best_mse}")

Best alpha value: 1.0, with MSE: 475.9224184961105


In [18]:
# Choose the smoothing factor (alpha) based on the cross-validation results
alpha = best_alpha

def apply_smoothing(column, alpha):
    smoothing_model = SimpleExpSmoothing(column, initialization_method="heuristic")
    smoothing_model = smoothing_model.fit(optimized=True)
    return smoothing_model.fittedvalues

# Apply exponential smoothing to all numeric columns
smoothed_data = ticker_data.select_dtypes(include=[np.number]).apply(apply_smoothing, alpha=alpha)

# Add the smoothed values to the DataFrame as new columns
for col in smoothed_data.columns:
    ticker_data[col] = smoothed_data[col]

ticker_data.sample(20)



Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-07-25,153.035004,153.970001,153.009994,153.509995,153.509995,8521405.0
2022-10-18,145.339996,145.360001,140.610001,142.764999,142.764999,11984180.0
2020-10-09,115.279999,116.279999,114.919998,115.964996,115.964996,13711160.0
2022-06-14,132.279999,132.949997,131.910004,132.309998,132.309998,12396320.0
2021-06-16,130.570007,130.610001,129.759995,129.994995,129.994995,8768866.0
2022-03-01,164.695007,166.600006,164.559998,166.149704,166.149704,12363130.0
2020-07-30,95.733749,95.949997,95.407501,95.648521,95.648521,3651253.0
2021-05-17,125.419998,125.819901,125.230003,125.521004,125.521004,9877775.0
2021-05-13,124.75,125.57,124.260002,125.410004,125.410004,13819620.0
2020-11-10,116.809998,117.589996,115.809998,116.579903,116.579903,17523050.0


In [19]:
# Generate technical indicators using the TA-Lib library
from ta.utils import dropna
import ta
import pandas as pd

ticker_data = dropna(ticker_data)

# List of technical indicators to calculate
indicators = ['SMA', 'EMA', 'MACD', 'RSI', 'ADX', 'BollingerBands', 'Stochastic_Oscillator', 'Williams_R', 'Price_ROC', 'OBV']

# Create a new DataFrame to store the technical indicators
technical_indicators = pd.DataFrame(index=ticker_data.index)

# Calculate technical indicators
technical_indicators['SMA7'] = ta.trend.SMAIndicator(close=ticker_data['Close'], window=int(7*6.5)).sma_indicator()
technical_indicators['SMA21'] = ta.trend.SMAIndicator(close=ticker_data['Close'], window=int(21*6.5)).sma_indicator()
technical_indicators['EMA7'] = ta.trend.EMAIndicator(close=ticker_data['Close'], window=int(7*6.5)).ema_indicator()
technical_indicators['EMA21'] = ta.trend.EMAIndicator(close=ticker_data['Close'], window=int(21*6.5)).ema_indicator()

macd = ta.trend.MACD(close=ticker_data['Close']).macd()
macdsignal = ta.trend.MACD(close=ticker_data['Close']).macd_signal()
technical_indicators['MACD'] = macd
technical_indicators['MACD_signal'] = macdsignal

technical_indicators['RSI'] = ta.momentum.RSIIndicator(close=ticker_data['Close'], window=int(14*6.5)).rsi()
technical_indicators['ADX'] = ta.trend.ADXIndicator(high=ticker_data['High'], low=ticker_data['Low'], close=ticker_data['Close'], window=int(14*6.5)).adx()

technical_indicators['BB_upper'], technical_indicators['BB_middle'], technical_indicators['BB_lower'] = ta.volatility.BollingerBands(close=ticker_data['Close'], window=int(20*6.5)).bollinger_mavg(), ta.volatility.BollingerBands(close=ticker_data['Close'], window=int(20*6.5)).bollinger_hband(), ta.volatility.BollingerBands(close=ticker_data['Close'], window=int(20*6.5)).bollinger_lband()

# Adding Stochastic Oscillator, Williams %R, Price Rate of Change, and On Balance Volume
# Stochastic Oscillator has a window of 2 weeks
technical_indicators['Stochastic_Oscillator'] = ta.momentum.StochasticOscillator(high=ticker_data['High'], low=ticker_data['Low'], close=ticker_data['Close'], window=int(14*6.5)).stoch()
# Williams %R has a window of 2 weeks
technical_indicators['Williams_R'] = ta.momentum.WilliamsRIndicator(high=ticker_data['High'], low=ticker_data['Low'], close=ticker_data['Close'], lbp=int(14*6.5)).williams_r()
technical_indicators['Price_ROC'] = ta.momentum.ROCIndicator(close=ticker_data['Close'], window=int(12*6.5)).roc()
technical_indicators['OBV'] = ta.volume.OnBalanceVolumeIndicator(close=ticker_data['Close'], volume=ticker_data['Volume']).on_balance_volume()


  dip[idx] = 100 * (self._dip[idx] / value)
  din[idx] = 100 * (self._din[idx] / value)


In [20]:
technical_indicators = technical_indicators.dropna()
technical_indicators.reset_index(inplace=True)
technical_indicators.rename(columns={'index': 'Datetime'}, inplace=True)
technical_indicators.sample(10)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  technical_indicators.rename(columns={'index': 'Datetime'}, inplace=True)


Unnamed: 0,Datetime,SMA7,SMA21,EMA7,EMA21,MACD,MACD_signal,RSI,ADX,BB_upper,BB_middle,BB_lower,Stochastic_Oscillator,Williams_R,Price_ROC,OBV
2867,2022-02-10,174.052038,169.313125,173.499651,171.457567,0.391238,0.586725,51.409831,7.976208,169.117214,180.612676,157.621753,82.985961,-17.014039,6.266499,380377200.0
3056,2022-03-22,159.369476,161.000148,161.831491,161.932428,2.514027,2.218189,53.592205,9.430899,160.956309,169.842528,152.07009,98.054704,-1.945296,4.37284,256309100.0
890,2020-12-28,129.82944,125.211243,130.289597,125.883539,1.770832,1.446998,60.502721,8.713953,125.557302,133.315692,117.798913,98.93339,-1.06661,12.359386,271660000.0
456,2020-09-28,109.413156,115.978435,110.712297,113.388849,0.958686,0.480102,49.850533,17.455901,115.35456,129.946648,100.762472,58.906901,-41.093099,0.818109,71953000.0
3170,2022-04-13,170.177116,171.490593,170.204354,170.157846,-0.049904,-0.615265,50.857353,8.513784,171.996852,180.361588,163.632116,37.562033,-62.437967,-3.964016,408566200.0
1104,2021-02-11,135.829345,135.518814,135.939298,135.081032,-0.224442,-0.111498,51.054255,5.748755,135.75812,144.340016,127.176224,35.868102,-64.131898,-5.527116,168682200.0
1926,2021-07-30,146.909956,145.572472,146.190382,144.24648,-0.42098,-0.459076,53.473577,14.286254,145.854173,150.126708,141.581638,47.899215,-52.100785,-1.581079,154857500.0
1268,2021-03-17,122.195338,123.45951,122.768948,124.280065,0.532416,0.872722,47.961573,16.358639,123.200075,129.127051,117.2731,53.789086,-46.210914,-2.656602,-25555910.0
3768,2022-08-17,169.986917,162.789913,170.338452,163.472582,1.099836,1.216958,61.54793,14.714009,163.193704,176.509346,149.878062,95.546532,-4.453468,8.226424,438266900.0
2959,2022-03-02,163.188233,169.072368,164.670125,167.139737,0.634848,0.294064,49.3962,10.863005,168.792821,178.417229,159.168412,68.082036,-31.917964,-1.236397,289241800.0


In [21]:
# Perform normalization
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
technical_indicators_scaled = pd.DataFrame(scaler.fit_transform(technical_indicators.drop(columns=['Datetime'])), columns=technical_indicators.drop(columns=['Datetime']).columns)

technical_indicators_scaled

Unnamed: 0,SMA7,SMA21,EMA7,EMA21,MACD,MACD_signal,RSI,ADX,BB_upper,BB_middle,BB_lower,Stochastic_Oscillator,Williams_R,Price_ROC,OBV
0,0.000000,0.000000,0.000000,0.000000,0.552075,0.584861,0.698980,0.000000,0.000000,0.000000,0.000000,0.746363,0.746363,0.542701,0.373925
1,0.000655,0.000907,0.000680,0.000749,0.549502,0.577453,0.714546,0.000000,0.000896,0.000788,0.000984,0.775736,0.775736,0.533120,0.383872
2,0.001205,0.001799,0.001241,0.001456,0.544950,0.570495,0.701300,0.000000,0.001803,0.001421,0.002148,0.754836,0.754836,0.530022,0.374428
3,0.001671,0.002663,0.001655,0.002112,0.538229,0.563405,0.683509,0.000000,0.002652,0.002018,0.003233,0.720832,0.720832,0.525586,0.365528
4,0.002211,0.003558,0.002302,0.002843,0.537826,0.557642,0.708755,0.000000,0.003563,0.002693,0.004362,0.772108,0.772108,0.518476,0.374309
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4954,0.844664,0.853914,0.849328,0.860852,0.516780,0.564585,0.514921,0.360972,0.854976,0.821165,0.868898,0.633651,0.633651,0.475959,0.837815
4955,0.845848,0.854401,0.848993,0.861205,0.499998,0.550010,0.508823,0.359011,0.855407,0.821325,0.869594,0.614450,0.614450,0.485810,0.826518
4956,0.846183,0.854868,0.848610,0.861530,0.485443,0.535051,0.503810,0.356934,0.855792,0.821495,0.870188,0.598747,0.598747,0.499273,0.815685
4957,0.846636,0.855432,0.848474,0.861929,0.479090,0.521643,0.519578,0.355029,0.856245,0.821707,0.870875,0.655727,0.655727,0.496781,0.826252


In [24]:
from google.colab import drive
drive.mount('/content/drive')

# Save the data to CSV files
technical_indicators_scaled.to_csv('/content/drive/My Drive/technical_indicators_scaled.csv', index=False)
ticker_data.to_csv('/content/drive/My Drive/ticker_data.csv', index=False)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
