In [None]:
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error
import numpy as np
import pandas as pd
import itertools
import time
import csv

In [None]:
# Load the preprocessed datasets
path_30min = ''
path_4h = ''
path_1d = ''

df_30min = pd.read_csv(path_30min)
df_4h = pd.read_csv(path_4h)
df_daily = pd.read_csv(path_1d)

all_dfs = [df_30min, df_4h, df_daily]

# Convert timestamp columns to datetime
for df in all_dfs:
    df['open_time'] = pd.to_datetime(df['open_time'])
    df['close_time'] = pd.to_datetime(df['close_time'])

# Define feature sets
price_features = ['close', 'high', 'low', 'volume', 'quote_vol', 'count', 'buy_base', 'buy_quote']
diff_features = ['close_diff', 'high_diff', 'low_diff']

In [None]:
def evaluate_model(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_true, y_pred)
    mape = mean_absolute_percentage_error(y_true, y_pred) * 100
    directional_acc = np.mean(np.sign(y_true[1:] - y_true[:-1]) == np.sign(y_pred[1:] - y_pred[:-1]))
    return mse, mae, rmse, r2, mape, directional_acc

def save_predictions(y_train, y_train_pred, y_test, y_test_pred, model_counter, file_prefix):
    train_df = pd.DataFrame({"Actual": y_train, f"Model_{model_counter}": y_train_pred})
    test_df = pd.DataFrame({"Actual": y_test, f"Model_{model_counter}": y_test_pred})
    train_file_path = f"{file_prefix}_train_predictions_model_{model_counter}.csv"
    test_file_path = f"{file_prefix}_test_predictions_model_{model_counter}.csv"
    train_df.to_csv(train_file_path, index=False)
    test_df.to_csv(test_file_path, index=False)

def train_and_evaluate_arima(df, target_column, params, model_counter, file_prefix='results'):
    p = params['p']
    d = params['d']
    q = params['q']

    train_size = int(len(df) * 0.8)
    train, test = df[target_column].values[:train_size], df[target_column].values[train_size:]

    start_time = time.time()
    model = ARIMA(train, order=(p, d, q))
    model_fit = model.fit()
    end_time = time.time()
    training_time = end_time - start_time

    predictions = model_fit.forecast(steps=len(test))
    train_predictions = model_fit.fittedvalues

    # Ensure that all predictions are aligned properly
    if len(train) != len(train_predictions):
        train = train[-len(train_predictions):]

    train_mse, train_mae, train_rmse, train_r2, train_mape, train_directional_acc = evaluate_model(train, train_predictions)
    test_mse, test_mae, test_rmse, test_r2, test_mape, test_directional_acc = evaluate_model(test, predictions)

    result = {
        "p": p,
        "d": d,
        "q": q,
        "train_mse": train_mse,
        "test_mse": test_mse,
        "train_mae": train_mae,
        "test_mae": test_mae,
        "train_rmse": train_rmse,
        "test_rmse": test_rmse,
        "train_r2": train_r2,
        "test_r2": test_r2,
        "train_mape": train_mape,
        "test_mape": test_mape,
        "train_directional_acc": train_directional_acc,
        "test_directional_acc": test_directional_acc,
        "training_time": training_time
    }

    with open(f'{file_prefix}_{target_column}.csv', 'a', newline='') as f:
        writer = csv.DictWriter(f, fieldnames=result.keys())
        if f.tell() == 0:
            writer.writeheader()
        writer.writerow(result)

    save_predictions(train, train_predictions, test, predictions, model_counter, file_prefix)
    
    print(f"Results: Train MAE: {train_mae:.4f}, Test MAE: {test_mae:.4f}")
    print(f"Train Directional Accuracy: {train_directional_acc:.4f}, Test Directional Accuracy: {test_directional_acc:.4f}")
    print(f"Training Time: {training_time:.4f} seconds")

In [None]:
target_column = 'close_diff'
file_prefix = 'ARIMA_30min1'

# Define parameter space
parameter_space = {
    'p': [0, 1, 2, 3, 4, 5],
    'd': [0, 1, 2],
    'q': [0, 1, 2, 3, 4, 5]
}

# Grid Search
start = time.time()
total_models = len(list(itertools.product(parameter_space['p'], parameter_space['d'], parameter_space['q'])))
model_counter = 1

for p, d, q in itertools.product(parameter_space['p'], parameter_space['d'], parameter_space['q']):
    param_dict = {'p': p, 'd': d, 'q': q}
    print(f"Running model {model_counter}/{total_models} with parameters: {param_dict}\n")
    train_and_evaluate_arima(df_30min, target_column, param_dict, model_counter, file_prefix)
    model_counter += 1

end = time.time()
total_time = end - start
print(f"Total time taken: {total_time} seconds")


In [None]:
file_prefix = 'ARIMA_4h2'

# Define parameter space
parameter_space = {
    'p': [0, 1, 2, 3, 4, 5],
    'd': [0, 1, 2],
    'q': [0, 1, 2, 3, 4, 5]
}

# Grid Search
start = time.time()
total_models = len(list(itertools.product(parameter_space['p'], parameter_space['d'], parameter_space['q'])))
model_counter = 1

for p, d, q in itertools.product(parameter_space['p'], parameter_space['d'], parameter_space['q']):
    param_dict = {'p': p, 'd': d, 'q': q}
    print(f"Running model {model_counter}/{total_models} with parameters: {param_dict}\n")
    train_and_evaluate_arima(df_4h, target_column, param_dict, model_counter, file_prefix)
    model_counter += 1

end = time.time()
total_time = end - start
print(f"Total time taken: {total_time} seconds")

In [None]:
# Define parameter space
parameter_space = {
    'p': [0, 1, 2, 3],
    'd': [0, 1, 2],
    'q': [0, 1, 2, 3]
}

# Grid Search
file_prefix = 'ARIMA_test2'
start = time.time()
total_models = len(list(itertools.product(parameter_space['p'], parameter_space['d'], parameter_space['q'])))
model_counter = 1

for p, d, q in itertools.product(parameter_space['p'], parameter_space['d'], parameter_space['q']):
    param_dict = {'p': p, 'd': d, 'q': q}
    print(f"Running model {model_counter}/{total_models} with parameters: {param_dict}\n")
    train_and_evaluate_arima(df_daily, target_column, param_dict, model_counter, file_prefix)
    model_counter += 1

end = time.time()
total_time = end - start
print(f"Total time taken: {total_time} seconds")