In [1]:
import os
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# Define the stock symbols and data directory
stock_symbols = ['AAPL', 'GOOGL', 'MSFT']
data_dir = 'data'

# Initialize a dictionary to store processed data
processed_data = {}
scaler = MinMaxScaler(feature_range=(0, 1))


In [2]:
for symbol in stock_symbols:
    file_path = os.path.join(data_dir, f'{symbol}_data.csv')
    
    if not os.path.exists(file_path):
        print(f"Data for {symbol} not found at {file_path}")
        continue
    
    # Load the dataset
    df = pd.read_csv(file_path, index_col='Date', parse_dates=True)
    
    # Example data processing steps
    # Drop rows with missing values
    df = df.dropna()
    
    # Scale the data
    df_scaled = scaler.fit_transform(df)
    
    # Convert back to DataFrame
    df_scaled = pd.DataFrame(df_scaled, index=df.index, columns=df.columns)
    
    # Store the processed data
    processed_data[symbol] = df_scaled
    print(f"Processed data for {symbol}")


Processed data for AAPL
Processed data for GOOGL
Processed data for MSFT


In [3]:
# Example of using the processed data
for symbol, data in processed_data.items():
    print(f"Processed data for {symbol}:\n{data.head()}\n")

# Optionally, save the processed data to CSV files
for symbol, data in processed_data.items():
    processed_file_path = os.path.join(data_dir, f'{symbol}_processed_data.csv')
    data.to_csv(processed_file_path)
    print(f"Processed data for {symbol} saved to {processed_file_path}")


Processed data for AAPL:
                Open      High       Low     Close  Adj Close    Volume
Date                                                                   
2020-01-02  0.126588  0.131229  0.149485  0.137767   0.132509  0.256276
2020-01-03  0.128278  0.131193  0.151856  0.132473   0.127380  0.283984
2020-01-06  0.122038  0.130064  0.145068  0.136770   0.131543  0.212595
2020-01-07  0.133274  0.131775  0.153630  0.134213   0.129066  0.188279
2020-01-08  0.128297  0.138218  0.153051  0.142917   0.137497  0.247584

Processed data for GOOGL:
                Open      High       Low     Close  Adj Close    Volume
Date                                                                   
2020-01-02  0.148347  0.153649  0.171458  0.161919   0.161919  0.157751
2020-01-03  0.148139  0.156231  0.171880  0.158233   0.158233  0.123771
2020-01-06  0.149982  0.168741  0.173749  0.176914   0.176914  0.328885
2020-01-07  0.174787  0.171378  0.194347  0.175524   0.175524  0.219672
2020-01-08  