In [None]:
import pandas as pd
import numpy as np
import os

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
#Define save path in Drive
output_dir = "/content/drive/MyDrive/AutoformerInput"
os.makedirs(output_dir, exist_ok=True)

In [None]:
from google.colab import files
uploaded = files.upload()

Saving aligned_all_stock_features.csv to aligned_all_stock_features.csv
Saving aligned_exogenous_features.csv to aligned_exogenous_features.csv
Saving aligned_nasdaq10_log_returns.csv to aligned_nasdaq10_log_returns.csv


In [None]:
features_df = pd.read_csv("aligned_all_stock_features.csv", parse_dates=["Date"])
exogenous_df = pd.read_csv("aligned_exogenous_features.csv", parse_dates=["Date"])
returns_df = pd.read_csv("aligned_nasdaq10_log_returns.csv", parse_dates=["Date"])


In [None]:
merged_df = features_df.merge(exogenous_df, on="Date").merge(returns_df, on="Date")


In [None]:
#Add time encodings (based on "Date")
merged_df["day_index"] = np.arange(1, len(merged_df) + 1)
merged_df["day_of_week"] = merged_df["Date"].dt.dayofweek
merged_df["day_of_year"] = merged_df["Date"].dt.dayofyear
merged_df["day_of_week_sin"] = np.sin(2 * np.pi * merged_df["day_of_week"] / 7)
merged_df["day_of_week_cos"] = np.cos(2 * np.pi * merged_df["day_of_week"] / 7)
merged_df["day_of_year_sin"] = np.sin(2 * np.pi * merged_df["day_of_year"] / 365)
merged_df["day_of_year_cos"] = np.cos(2 * np.pi * merged_df["day_of_year"] / 365)
merged_df.drop(columns=["day_of_week", "day_of_year"], inplace=True)


In [None]:
stock_name_map = {
    'AAPL': 'Apple',
    'AMZN': 'Amazon',
    'MSFT': 'Microsoft',
    'META': 'Meta',
    'GOOGL': 'Google',
    'COST': 'Costco',
    'TSLA': 'Tesla',
    'AMGN': 'Amgen',
    'NFLX': 'Netflix',
    'SBUX': 'Starbucks'
}

In [None]:
for ticker, company in stock_name_map.items():
    if ticker not in merged_df.columns:
        print(f"❌ Skipping {company} — ticker '{ticker}' not found in returns")
        continue

    # Feature columns from company-prefixed features
    feature_cols = [col for col in merged_df.columns if col.startswith(company + "_")]

    # Final stock DataFrame
    stock_df = merged_df[
        ["Date", "SPY_ret", "QQQ_ret", "VIX",
         "day_index", "day_of_week_sin", "day_of_week_cos",
         "day_of_year_sin", "day_of_year_cos"]
        + feature_cols + [ticker]
    ].copy()

    # Rename target column to match company name
    stock_df.rename(columns={ticker: f"{company}_log_return"}, inplace=True)

    # Strip company prefix from feature names
    stock_df.columns = [col.replace(f"{company}_", "") if col.startswith(f"{company}_") else col for col in stock_df.columns]

    if "day_index" in stock_df.columns:
        stock_df.drop(columns=["day_index"], inplace=True)

    # Drop any missing values
    stock_df.dropna(inplace=True)

    # Save to Drive
    output_path = os.path.join(output_dir, f"{company}.csv")
    stock_df.to_csv(output_path, index=False)
    print(f"✅ Saved: {output_path}")

print("🎉 All per-stock files saved to Google Drive!")

✅ Saved: /content/drive/MyDrive/AutoformerInput/Apple.csv
✅ Saved: /content/drive/MyDrive/AutoformerInput/Amazon.csv
✅ Saved: /content/drive/MyDrive/AutoformerInput/Microsoft.csv
✅ Saved: /content/drive/MyDrive/AutoformerInput/Meta.csv
✅ Saved: /content/drive/MyDrive/AutoformerInput/Google.csv
✅ Saved: /content/drive/MyDrive/AutoformerInput/Costco.csv
✅ Saved: /content/drive/MyDrive/AutoformerInput/Tesla.csv
✅ Saved: /content/drive/MyDrive/AutoformerInput/Amgen.csv
✅ Saved: /content/drive/MyDrive/AutoformerInput/Netflix.csv
✅ Saved: /content/drive/MyDrive/AutoformerInput/Starbucks.csv
🎉 All per-stock files saved to Google Drive!


day_index