In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import joblib

# Load the dataset
file_path = "../data/stock_data/TSLA.csv"
df = pd.read_csv(file_path, parse_dates=["Date"], index_col="Date")

# Scale the closing price
close_scaler = MinMaxScaler(feature_range=(0, 1))
df["Close_Scaled"] = close_scaler.fit_transform(df[["Close"]])

# Handle missing values
df.dropna(inplace=True)

# Split into 4 client datasets with overlap
total_len = len(df)
client_size = int(total_len / (4 * 0.8))  # ~944 points per client
overlap = int(client_size * 0.2)
clients_data = []
for client_id in range(4):
    start = client_id * (client_size - overlap)
    if start < 0:
        start = 0
    end = min(start + client_size, total_len)
    client_df = df.iloc[start:end]
    X, y = [], []
    lookback = 60
    client_data = client_df['Close_Scaled'].values
    for i in range(lookback, len(client_data)):
        X.append(client_data[i-lookback:i])
        y.append(client_data[i])
    X, y = np.array(X), np.array(y)
    clients_data.append((X, y))
    joblib.dump(X, f"../processed_data/X_client_{client_id}.pkl")
    joblib.dump(y, f"../processed_data/y_client_{client_id}.pkl")
    print(f"Client {client_id}: {len(y)} samples saved.")

# Save test data (last 20%)
train_size = int(total_len * 0.8)
test_data = df['Close_Scaled'].values[train_size:]
X_test, y_test = [], []
for i in range(lookback, len(test_data)):
    X_test.append(test_data[i-lookback:i])
    y_test.append(test_data[i])
X_test, y_test = np.array(X_test), np.array(y_test)
joblib.dump(X_test, "../processed_data/X_test_seq.pkl")
joblib.dump(y_test, "../processed_data/y_test_seq.pkl")
joblib.dump(close_scaler, '../processed_data/close_scaler.pkl')

print("Data preprocessing complete for 4 clients and test set!")

Client 0: 647 samples saved.
Client 1: 647 samples saved.
Client 2: 647 samples saved.
Client 3: 506 samples saved.
Data preprocessing complete for 4 clients and test set!


: 