In [1]:
# Import necessary libraries
import torch
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, Subset
from sklearn.preprocessing import MinMaxScaler
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px

# Define parameters (equivalent to class initialization arguments)
csv_file = 'data.csv'  # Replace with actual CSV file path
sequence_lengths = [5, 20, 60]
target_asset = 'Platinum_Vol.'
target_horizon = 1
test_size = 0.2
val_size = 0.1

In [16]:
# Load data from CSV
df = pd.read_csv(csv_file)
df['Date'] = pd.to_datetime(df['Date'], format='%d-%m-%Y')
df = df.sort_values('Date')  # Ensure chronological order
for col in df.columns:
    if col != 'Date':
        try:
            df[col] = df[col].astype(str).str.replace(',', '').astype(float)
        except:
            continue




In [17]:
# Handle missing values
df = df.ffill()
df = df.bfill()

# Replace NaNs in volume columns with 0
for col in df.columns:
    if 'Vol' in col:
        df[col] = df[col].fillna(0)


# Check for remaining NaNs
print("Remaining NaNs:\n", df.isna().sum())

Remaining NaNs:
 Unnamed: 0           0
Date                 0
Natural_Gas_Price    0
Natural_Gas_Vol.     0
Crude_oil_Price      0
Crude_oil_Vol.       0
Copper_Price         0
Copper_Vol.          0
Bitcoin_Price        0
Bitcoin_Vol.         0
Platinum_Price       0
Platinum_Vol.        0
Ethereum_Price       0
Ethereum_Vol.        0
S&P_500_Price        0
Nasdaq_100_Price     0
Nasdaq_100_Vol.      0
Apple_Price          0
Apple_Vol.           0
Tesla_Price          0
Tesla_Vol.           0
Microsoft_Price      0
Microsoft_Vol.       0
Silver_Price         0
Silver_Vol.          0
Google_Price         0
Google_Vol.          0
Nvidia_Price         0
Nvidia_Vol.          0
Berkshire_Price      0
Berkshire_Vol.       0
Netflix_Price        0
Netflix_Vol.         0
Amazon_Price         0
Amazon_Vol.          0
Meta_Price           0
Meta_Vol.            0
Gold_Price           0
Gold_Vol.            0
dtype: int64


In [18]:

volume_columns = [
    "Natural_Gas_Vol.",    "Crude_oil_Vol.",    "Copper_Vol.",        "Platinum_Vol.",        "Nasdaq_100_Vol.",
    "Apple_Vol.",    "Tesla_Vol.",    "Microsoft_Vol.",    "Silver_Vol.",    "Google_Vol.",    "Nvidia_Vol.",    "Berkshire_Vol.",
    "Netflix_Vol.",    "Amazon_Vol.",    "Meta_Vol.",    "Gold_Vol.",
    "Bitcoin_Vol.","Ethereum_Vol."
]#"Bitcoin_Vol.","Ethereum_Vol.",

# Function to clip outliers based on box plot (IQR) logic
def clip_outliers_iqr(series):
    Q1 = series.quantile(0.25)
    Q3 = series.quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    return series.clip(lower=lower_bound, upper=upper_bound)

# Columns to clip
clip_columns = ["Bitcoin_Vol.", "Ethereum_Vol."]

# Apply the IQR clipping only to those columns
df[clip_columns] = df[clip_columns].apply(clip_outliers_iqr)

# Melt the dataframe to long format
df_melted = df.melt(id_vars='Date', value_vars=volume_columns,
                    var_name='Asset', value_name='Volume')

# Create a single interactive line plot
fig = px.line(df_melted, x='Date', y='Volume', color='Asset',
              title='Clipped Volume Over Time (Outliers Reduced for Bitcoin & Ethereum)')

fig.update_layout(hovermode='x unified')
fig.show()

In [19]:
price_columns = [   "Natural_Gas_Price", "Crude_oil_Price",    "Copper_Price",            "Ethereum_Price",    "S&P_500_Price",
    "Nasdaq_100_Price",    "Apple_Price",    "Tesla_Price",    "Microsoft_Price",    "Silver_Price",    "Google_Price",    "Nvidia_Price",
    "Berkshire_Price",    "Netflix_Price",    "Amazon_Price",    "Meta_Price",    "Gold_Price",
                     "Platinum_Price","Bitcoin_Price",
]#"Platinum_Price","Bitcoin_Price",


df_price_melted = df.melt(id_vars='Date', value_vars=price_columns,
                          var_name='Asset', value_name='Price')

fig = px.line(df_price_melted, x='Date', y='Price', color='Asset',
              title='Raw Prices Over Time')

fig.update_layout(hovermode='x unified')
fig.show()


In [20]:
new_price_columns = []

for col in price_columns:
    return_col = col.replace("_Price", "_return")
    # Replace the price data with log returns
    df[col] = np.log(df[col] / df[col].shift(1))
    # Rename the column
    df.rename(columns={col: return_col}, inplace=True)
    # Keep track of new names if needed later
    new_price_columns.append(return_col)



# Melt the return columns for plotting
df_return_melted = df.melt(id_vars='Date', value_vars=new_price_columns,
                           var_name='Asset', value_name='Log_Return')

fig = px.line(df_return_melted, x='Date', y='Log_Return', color='Asset',
              title='Log Returns Over Time')

fig.update_layout(hovermode='x unified')
fig.show()



invalid value encountered in log



In [21]:

def create_train_val_test_split(df, test_size=0.2, val_size=0.2):
    """Create train/validation/test split indices preserving time order"""
    total_samples = len(df)
    test_split_idx = int(total_samples * (1 - test_size))
    val_split_idx = int(test_split_idx * (1 - val_size / (1 - test_size)))

    train_indices = list(range(val_split_idx))
    val_indices = list(range(val_split_idx, test_split_idx))
    test_indices = list(range(test_split_idx, total_samples))


    return train_indices, val_indices, test_indices

# Create splits for detrended data
train_idx, val_idx, test_idx = create_train_val_test_split(df)
train_df = df.iloc[train_idx].copy()
val_df = df.iloc[val_idx].copy()
test_df = df.iloc[test_idx].copy()

print(f"Train samples: {len(train_idx)} ({len(train_idx)/len(df)*100:.1f}%)")
print(f"Validation samples: {len(val_idx)} ({len(val_idx)/len(df)*100:.1f}%)")
print(f"Test samples: {len(test_idx)} ({len(test_idx)/len(df)*100:.1f}%)")

Train samples: 745 (59.9%)
Validation samples: 249 (20.0%)
Test samples: 249 (20.0%)


In [24]:
from sklearn.preprocessing import StandardScaler

# Columns to scale (exclude 'Date')
features_to_scale = df.columns.drop("Date")

# Initialize the scaler and fit on train
scaler = StandardScaler()
scaler.fit(train_df[features_to_scale])

# Apply transform and reattach Date column
train_scaled = train_df[["Date"]].copy()
train_scaled[features_to_scale] = scaler.transform(train_df[features_to_scale])

val_scaled = val_df[["Date"]].copy()
val_scaled[features_to_scale] = scaler.transform(val_df[features_to_scale])

test_scaled = test_df[["Date"]].copy()
test_scaled[features_to_scale] = scaler.transform(test_df[features_to_scale])


In [26]:
train_scaled

df_return_melted = train_scaled.melt(id_vars='Date', value_vars=new_price_columns,
                           var_name='Asset', value_name='Log_Return')

fig = px.line(df_return_melted, x='Date', y='Log_Return', color='Asset',
              title='Log Returns Over Time')

fig.update_layout(hovermode='x unified')
fig.show()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Example monthly performance data as a dictionary
data = {
    'Month': ['2023-02', '2023-03', '2023-04', '2023-05', '2023-06', 
              '2023-07', '2023-08', '2023-09', '2023-10', '2023-11', '2023-12', '2024-01'],
    'Buy_and_Hold': [200000, 201799.46, 205592.28, 207753.62, 214283.01, 227671.77, 222703.89, 221992.76, 217988.60, 224719.86, 229511.60, 237445.67],
    'Model_Strategy': [200000, 204289.08, 206641.03, 208813.39, 215804.89, 228321.13, 225003.62, 222710.77, 218693.65, 226534.12, 230875.12, 238283.64],
    'Random_Strategy': [200000, 197625.88, 194232.14, 194470.88, 198447.58, 205427.09, 203537.67, 199148.41, 196826.07, 203126.68, 203133.38, 210523.20]
}

# Create a DataFrame
df_monthly = pd.DataFrame(data)

# Convert Month to datetime for proper plotting
df_monthly['Month'] = pd.to_datetime(df_monthly['Month'], format='%Y-%m')

# Plotting the monthly portfolio performance
plt.figure(figsize=(12, 6))
plt.plot(df_monthly['Month'], df_monthly['Buy_and_Hold'], marker='o', label='Buy and Hold')
plt.plot(df_monthly['Month'], df_monthly['Model_Strategy'], marker='s', label='Model-based Strategy')
plt.plot(df_monthly['Month'], df_monthly['Random_Strategy'], marker='^', label='Random Strategy')

plt.title('Monthly Portfolio Performance Comparison', fontsize=16)
plt.xlabel('Month', fontsize=14)
plt.ylabel('Portfolio Value ($)', fontsize=14)
plt.legend(fontsize=12)
plt.grid(True, alpha=0.3)
plt.tight_layout()

# Save the plot to a file
#plt.savefig('monthly_portfolio_performance.png')

# Display the plot
plt.show()
