In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import xgboost as xgb
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Assuming `df` is your final dataframe after feature engineering

# Convert date to datetime type
df['date'] = pd.to_datetime(df['date'])

# Sort data by date
df = df.sort_values('date')

# --- Feature Engineering for Lag Features, Rolling Stats ---

# Lag Features
lag_days = [7, 14, 30, 60]
for lag in lag_days:
    df[f'high_lag_{lag}'] = df['high'].shift(lag)
    df[f'low_lag_{lag}'] = df['low'].shift(lag)
    df[f'close_lag_{lag}'] = df['close'].shift(lag)
    df[f'open_lag_{lag}'] = df['open'].shift(lag)
    df[f'volume_lag_{lag}'] = df['volume'].shift(lag)

# Rolling Statistics
rolling_windows = [7, 14, 30, 60]
for window in rolling_windows:
    df[f'high_rollmean_{window}'] = df['high'].rolling(window).mean()
    df[f'low_rollmean_{window}'] = df['low'].rolling(window).mean()
    df[f'close_rollmean_{window}'] = df['close'].rolling(window).mean()
    df[f'open_rollmean_{window}'] = df['open'].rolling(window).mean()
    df[f'volume_rollmean_{window}'] = df['volume'].rolling(window).mean()

    df[f'high_rollstd_{window}'] = df['high'].rolling(window).std()
    df[f'low_rollstd_{window}'] = df['low'].rolling(window).std()
    df[f'close_rollstd_{window}'] = df['close'].rolling(window).std()
    df[f'open_rollstd_{window}'] = df['open'].rolling(window).std()
    df[f'volume_rollstd_{window}'] = df['volume'].rolling(window).std()

# Date features
df['year'] = df['date'].dt.year
df['month'] = df['date'].dt.month
df['day'] = df['date'].dt.day
df['weekday'] = df['date'].dt.weekday

# Encoding Currency (only 'cardano' and 'ripple')
df['currency_name_encoded'] = df['currency_name'].map({'cardano': 0, 'ripple': 1})

# Drop rows with NaN values due to lag and rolling features
df = df.dropna()

# --- Prepare Features and Target Variables ---

# Features (X) excluding 'date', 'currency_name', and targets
X = df.drop(columns=['date', 'currency_name', 'high', 'low'])

# Targets (y) - high and low prices
y_high = df['high']
y_low = df['low']

# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_high_train, y_high_test, y_low_train, y_low_test = train_test_split(
    X, y_high, y_low, test_size=0.2, random_state=42
)

# --- Linear Regression Model ---
lr_model_high = LinearRegression()
lr_model_low = LinearRegression()

lr_model_high.fit(X_train, y_high_train)
lr_model_low.fit(X_train, y_low_train)