In [1]:
import yfinance as yf  
import pandas as pd  
import numpy as np  
from sklearn.preprocessing import MinMaxScaler  
from sklearn.decomposition import PCA  
from pywt import wavedec  
import os  
import matplotlib.pyplot as plt  
  
# Define the tickers  
tickers = ['INDF.JK', 'BBNI.JK', 'SMAR.JK', 'ANTM.JK', 'TLKM.JK']  
  
# Download the stock data using yfinance  
data = yf.download(tickers, start='2010-01-01', end='2022-02-26')  
  
# Convert the data to a pandas dataframe with a company column  
df = pd.DataFrame(data).stack(level=1).reset_index()  
df.columns = ['Date', 'Company', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']  
  
# Remove hours from the date column  
df['Date'] = df['Date'].dt.date  
  
# Save the original dataframe to a CSV file  
if not os.path.exists('data_processing'):  
   os.makedirs('data_processing')  
df.to_csv('data_processing/original_data.csv', index=False)  
  
# Scale the data using Min-Max Scaler  
scaler = MinMaxScaler()  
df = df.fillna(0)  # replace NaN values with 0  
df = df.clip(lower=-1e10, upper=1e10)  # clip infinite values to a finite range  
scaled_df = scaler.fit_transform(df)  
  
# Apply PCA to reduce dimensionality  
pca = PCA(n_components=10)  
pca_df = pca.fit_transform(scaled_df)  
  
# Apply Wavelet transform to extract frequency components  
def apply_wavelet_transform(data):  
   wavelet_data = []  
   for i in range(data.shape[1]):  
      coeffs = wavedec(data[:, i], 'db4', level=3)  
      wavelet_data.append(coeffs[0])  
   return pd.DataFrame(wavelet_data).T  
  
wavelet_df = apply_wavelet_transform(pca_df)  
  
# Save the preprocessed data to a CSV file  
wavelet_df.to_csv('data_processing/preprocessed_data.csv', index=False)

[**********************80%*************          ]  4 of 5 completed

KeyboardInterrupt: 