In [1]:
import pandas as pd
import requests
import datetime

In [3]:
class BinanceIngestionData:
    def __init__(self, symbol, interval, start_date, end_date):
        self.symbol = symbol
        self.interval = interval
        self.start_date = start_date
        self.end_date = end_date
        self.base_url = "https://api.binance.com/api/v3/klines"

    def fetch_data(self):
        params = {
            "symbol": self.symbol,
            "interval": self.interval,
            "startTime": int(datetime.datetime.strptime(self.start_date, "%Y-%m-%d").timestamp() * 1000),
            "endTime": int(datetime.datetime.strptime(self.end_date, "%Y-%m-%d").timestamp() * 1000),
            "limit": 1000
        }
        response = requests.get(self.base_url, params=params)
        data = response.json()

        if response.status_code != 200 or not data:
            raise Exception("Failed to fetch data. Check your symbol, interval, or date range.")

        return data

    def process_data(self, data):
        df = pd.DataFrame(data, columns=[
            "Open Time", "Open", "High", "Low", "Close", "Volume", "Close Time", 
            "Quote Asset Volume", "Number of Trades", "Taker Buy Base Asset Volume", 
            "Taker Buy Quote Asset Volume", "Ignore"
        ])

        # Convert to proper data types
        df["Open Time"] = pd.to_datetime(df["Open Time"], unit="ms")
        df.set_index("Open Time", inplace=True)
        df = df.astype({
            "Open": "float", 
            "High": "float", 
            "Low": "float", 
            "Close": "float", 
            "Volume": "float", 
            "Quote Asset Volume": "float", 
            "Number of Trades": "int", 
            "Taker Buy Base Asset Volume": "float", 
            "Taker Buy Quote Asset Volume": "float"
        })

        return df

    def save_to_csv(self, df, file_path):
        df.to_csv(file_path)
        print(f"Data saved to {file_path}")

if __name__ == "__main__":
    symbol = "ETHUSDT"
    interval = "1d"
    end_date = datetime.datetime.now().strftime("%Y-%m-%d")
    start_date = (datetime.datetime.now() - datetime.timedelta(days=730)).strftime("%Y-%m-%d")

    binance_data = BinanceIngestionData(symbol, interval, start_date, end_date)

    raw_data = binance_data.fetch_data()
    processed_data = binance_data.process_data(raw_data)
    binance_data.save_to_csv(processed_data, "ETH_2Y_Binance.csv")


Data saved to ETH_2Y_Binance.csv


In [2]:
df = pd.read_csv("/Users/mac/Desktop/Comapny2/CryptoPortfolioAI/Experiments/ETH_2Y_Binance.csv")
df.head()

Unnamed: 0,Open Time,Open,High,Low,Close,Volume,Close Time,Quote Asset Volume,Number of Trades,Taker Buy Base Asset Volume,Taker Buy Quote Asset Volume,Ignore
0,2023-01-15,1549.91,1566.66,1516.03,1552.52,387832.7138,1673827199999,596626800.0,590180,192499.6889,296178100.0,0
1,2023-01-16,1552.52,1604.5,1521.31,1576.94,574622.8953,1673913599999,897953800.0,802443,283353.4366,442958600.0,0
2,2023-01-17,1576.94,1609.0,1541.15,1565.57,506771.7219,1673999999999,797552600.0,703325,253800.4618,399512200.0,0
3,2023-01-18,1565.56,1610.0,1501.39,1511.43,752534.8857,1674086399999,1172243000.0,913587,379449.3465,591240900.0,0
4,2023-01-19,1511.44,1564.43,1509.15,1551.09,308679.4645,1674172799999,473220800.0,477203,157860.3364,242038700.0,0


In [3]:
class FeatureEngineering:
    def __init__(self, df):
        self.df = df

    def add_features(self):
        
        self.df = self.df.rename(columns={"Open Time": "ds", "Close": "y"})
        self.df = self.df.drop(columns=["Ignore"], errors='ignore')
        self.df['High_Low_Diff'] = self.df['High'] - self.df['Low']
        self.df['Open_Close_Diff'] = self.df['Open'] - self.df['y']
        self.df['Average_Price'] = (self.df['High'] + self.df['Low'] + self.df['y']) / 3
        self.df['Volume_Weighted_Price'] = self.df['Quote Asset Volume'] / self.df['Volume']
        

        return self.df

In [4]:
feature_engineering = FeatureEngineering(df)
engineered_data = feature_engineering.add_features()
engineered_data.head()

Unnamed: 0,ds,Open,High,Low,y,Volume,Close Time,Quote Asset Volume,Number of Trades,Taker Buy Base Asset Volume,Taker Buy Quote Asset Volume,High_Low_Diff,Open_Close_Diff,Average_Price,Volume_Weighted_Price
0,2023-01-15,1549.91,1566.66,1516.03,1552.52,387832.7138,1673827199999,596626800.0,590180,192499.6889,296178100.0,50.63,-2.61,1545.07,1538.36108
1,2023-01-16,1552.52,1604.5,1521.31,1576.94,574622.8953,1673913599999,897953800.0,802443,283353.4366,442958600.0,83.19,-24.42,1567.583333,1562.683562
2,2023-01-17,1576.94,1609.0,1541.15,1565.57,506771.7219,1673999999999,797552600.0,703325,253800.4618,399512200.0,67.85,11.37,1571.906667,1573.790695
3,2023-01-18,1565.56,1610.0,1501.39,1511.43,752534.8857,1674086399999,1172243000.0,913587,379449.3465,591240900.0,108.61,54.13,1540.94,1557.726014
4,2023-01-19,1511.44,1564.43,1509.15,1551.09,308679.4645,1674172799999,473220800.0,477203,157860.3364,242038700.0,55.28,-39.65,1541.556667,1533.049216


In [5]:
from prophet import Prophet

def Prophet_Features(df):
    df.reset_index(inplace=True)
    pd.to_datetime(df["ds"])
    prophet_model = Prophet(
                        growth='linear',
                        seasonality_mode='additive',
                        interval_width=0.95,
                        daily_seasonality=True,
                        weekly_seasonality=True,
                        yearly_seasonality=False
                    )
    prophet_model.fit(df)
    prophet_results = prophet_model.predict(df.drop('y', axis=1))
        
    return prophet_results

In [6]:
prophet_data = Prophet_Features(engineered_data)

INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1


In [7]:
from statsmodels.tsa.holtwinters import ExponentialSmoothing

def Triple_ETS(df):
    df['Triple_Multiplicative_ETS'] = ExponentialSmoothing(
        df['y'], trend='mul', seasonal='mul', seasonal_periods=24*7
    ).fit().fittedvalues

    df['Triple_Additive_ETS'] = ExponentialSmoothing(
        df['y'], trend='add', seasonal='add', seasonal_periods=24*7
    ).fit().fittedvalues

    if 'index' in df.columns:
        df.drop(columns=['index'], inplace=True)

    return df


In [8]:
data = Triple_ETS(engineered_data)

  return err.T @ err


In [9]:
def Merging_DFs(df1, df2):
    df1['ds'] = df1['ds'].astype(str)
    df2['ds'] = df2['ds'].astype(str)
    featured_df = pd.merge(df1, df2, how='left', on='ds')
    return featured_df

In [10]:
final_df = Merging_DFs(data, prophet_data)
final_df

Unnamed: 0,ds,Open,High,Low,y,Volume,Close Time,Quote Asset Volume,Number of Trades,Taker Buy Base Asset Volume,...,daily,daily_lower,daily_upper,weekly,weekly_lower,weekly_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,yhat
0,2023-01-15,1549.91,1566.66,1516.03,1552.52,387832.7138,1673827199999,5.966268e+08,590180,192499.6889,...,64.708527,64.708527,64.708527,1.749952,1.749952,1.749952,0.0,0.0,0.0,1620.240474
1,2023-01-16,1552.52,1604.50,1521.31,1576.94,574622.8953,1673913599999,8.979538e+08,802443,283353.4366,...,64.708527,64.708527,64.708527,11.828936,11.828936,11.828936,0.0,0.0,0.0,1632.186617
2,2023-01-17,1576.94,1609.00,1541.15,1565.57,506771.7219,1673999999999,7.975526e+08,703325,253800.4618,...,64.708527,64.708527,64.708527,-5.954763,-5.954763,-5.954763,0.0,0.0,0.0,1616.270077
3,2023-01-18,1565.56,1610.00,1501.39,1511.43,752534.8857,1674086399999,1.172243e+09,913587,379449.3465,...,64.708527,64.708527,64.708527,0.727126,0.727126,0.727126,0.0,0.0,0.0,1624.819126
4,2023-01-19,1511.44,1564.43,1509.15,1551.09,308679.4645,1674172799999,4.732208e+08,477203,157860.3364,...,64.708527,64.708527,64.708527,-4.482332,-4.482332,-4.482332,0.0,0.0,0.0,1621.476827
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
725,2025-01-09,3327.29,3357.27,3158.00,3219.20,501818.4247,1736467199999,1.638959e+09,3685889,237793.7346,...,64.708527,64.708527,64.708527,-4.482332,-4.482332,-4.482332,0.0,0.0,0.0,3442.695279
726,2025-01-10,3219.20,3322.49,3193.97,3267.04,454142.1333,1736553599999,1.482941e+09,3199118,222519.1849,...,64.708527,64.708527,64.708527,-2.505943,-2.505943,-2.505943,0.0,0.0,0.0,3450.698652
727,2025-01-11,3267.05,3320.18,3217.56,3282.83,151679.7487,1736639999999,4.949333e+08,1208777,74202.7876,...,64.708527,64.708527,64.708527,-1.362977,-1.362977,-1.362977,0.0,0.0,0.0,3457.868601
728,2025-01-12,3282.83,3300.00,3224.49,3267.30,164879.9709,1736726399999,5.387517e+08,1151908,78312.5332,...,64.708527,64.708527,64.708527,1.749952,1.749952,1.749952,0.0,0.0,0.0,3467.008513


In [16]:
import xgboost as xgb
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
from datetime import timedelta

class XGBoostForecasting:
    def __init__(self, data, date_column, target_column):
        self.data = data
        self.date_column = date_column
        self.target_column = target_column

    def preprocess_data(self):
        self.data = self.data.rename(columns={self.date_column: 'ds', self.target_column: 'y'})
        self.data['ds'] = pd.to_datetime(self.data['ds'])
        
        # Add time-based features (e.g., day, month, weekday)
        self.data['day'] = self.data['ds'].dt.day
        self.data['month'] = self.data['ds'].dt.month
        self.data['weekday'] = self.data['ds'].dt.weekday

    def prepare_features(self, training_period=730):
        last_date = self.data['ds'].max()
        start_date = last_date - timedelta(days=training_period)
        training_data = self.data[self.data['ds'] >= start_date]
        
        features = training_data[['day', 'month', 'weekday']]
        target = training_data['y']
        
        return features, target, training_data

    def train_model(self, training_period=730):
        features, target, _ = self.prepare_features(training_period)
        
        X_train = features
        y_train = target

        self.model = xgb.XGBRegressor(objective='reg:squarederror', 
                                      n_estimators=1000,
                                        learning_rate=0.01,  # Lower learning rate for small changes
                                        max_depth=6,  # Adjust tree depth
                                        subsample=0.8,  # Use a fraction of samples
                                        colsample_bytree=0.8  # Use a fraction of features
                                      )
        self.model.fit(X_train, y_train)

    def forecast(self, future_periods=180):
        last_date = self.data['ds'].max()
        forecast_dates = [last_date + timedelta(days=i) for i in range(1, future_periods + 1)]
        
        future_features = pd.DataFrame({
            'day': [d.day for d in forecast_dates],
            'month': [d.month for d in forecast_dates],
            'weekday': [d.weekday() for d in forecast_dates]
        })
        
        forecast_values = self.model.predict(future_features)
        
        mean = self.data['y'].mean()
        # median = self.data['y'].median()

        adjustment_factor = mean/2 # Statistical Techniques
        
        forecast_values += adjustment_factor
        
        forecast = pd.DataFrame({
            'ds': forecast_dates,
            'yhat': forecast_values
        })
        
        return forecast



    def evaluate_model(self, training_period=730):
        features, target, _ = self.prepare_features(training_period)
        
        # Train-test split
        X_test = features
        y_test = target

        # Make predictions on the test set
        y_pred = self.model.predict(X_test)

        # Calculate evaluation metrics
        mae = mean_absolute_error(y_test, y_pred)
        mse = mean_squared_error(y_test, y_pred)
        rmse = np.sqrt(mse)
        mape = mean_absolute_percentage_error(y_test, y_pred)

        return {'MAE': mae, 'MSE': mse, 'RMSE': rmse, 'MAPE': mape}

    def plot_forecast(self, forecast):
        fig = go.Figure()

        # Plot actual data
        fig.add_trace(go.Scatter(x=self.data['ds'], y=self.data['y'], mode='lines', name='Actual', line=dict(color='blue')))

        # Plot forecast
        fig.add_trace(go.Scatter(x=forecast['ds'], y=forecast['yhat'], mode='lines', name='Forecast', line=dict(color='green')))

        # Layout and styling
        fig.update_layout(
            title='XGBoost Forecast for the Next 6 Months',
            xaxis_title='Date',
            yaxis_title='Price',
            showlegend=True,
            template='plotly_dark'
        )
        fig.show()


if __name__ == "__main__":
    # Assuming engineered_data is your input data
    xgboost_forecasting = XGBoostForecasting(data=final_df, date_column='ds', target_column='y')
    xgboost_forecasting.preprocess_data()
    xgboost_forecasting.train_model(training_period=730)
    forecast = xgboost_forecasting.forecast(future_periods=180)  # 6 months forecast
    evaluation_metrics = xgboost_forecasting.evaluate_model(training_period=730)

    # Plot the forecast
    xgboost_forecasting.plot_forecast(forecast)

    # Print the evaluation metrics
    print("Evaluation Metrics:")
    print(f"MAE: {evaluation_metrics['MAE']}")
    print(f"MSE: {evaluation_metrics['MSE']}")
    print(f"RMSE: {evaluation_metrics['RMSE']}")
    print(f"MAPE: {evaluation_metrics['MAPE']}")

Evaluation Metrics:
MAE: 603.2040892818922
MSE: 410488.5316404608
RMSE: 640.6937892944342
MAPE: 0.2615940546522215
