In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error

# Load the dataset
data = pd.read_csv('C:\\Users\\owd1\\OneDrive\\Desktop\\Bitcoin Hisroty\\bitcoin_data_15K.csv')

# Convert 'datetime' to datetime object and set as index
data['datetime'] = pd.to_datetime(data['datetime'])
data.set_index('datetime', inplace=True)

# Calculate the percentage change for price-related columns
data['open_change'] = data['open'].pct_change() * 100
data['high_change'] = data['high'].pct_change() * 100
data['low_change'] = data['low'].pct_change() * 100
data['close_change'] = data['close'].pct_change() * 100

# Calculate the percentage change for volume
data['volume_change'] = data['volume'].pct_change() * 100

# Drop the original price, volume columns and SMA columns
data.drop(['open', 'high', 'low', 'close', 'volume', 'SMA_12', 'SMA_26'], axis=1, inplace=True)

# Handle any NaN values that arise from percentage change calculations
data = data.dropna()

# Normalization of features
scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(data)
data_scaled = pd.DataFrame(data_scaled, columns=data.columns, index=data.index)

# Splitting the data into training and testing sets
train_data, test_data = train_test_split(data_scaled, test_size=0.2, random_state=42, shuffle=False)

# Separating the target variable ('close_change') from features
X_train = train_data.drop('close_change', axis=1)
y_train = train_data['close_change']
X_test = test_data.drop('close_change', axis=1)
y_test = test_data['close_change']

# Training the RandomForestRegressor model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predicting on the test set
predictions = model.predict(X_test)

# Evaluating the model
mae = mean_absolute_error(y_test, predictions)
print(f'Mean Absolute Error: {mae}')

# Load the test dataset
data2 = pd.read_csv('C:\\Users\\owd1\\OneDrive\\Desktop\\Bitcoin Hisroty\\BTCUSDT_data_with_indicators2.csv')

# Convert 'datetime' to datetime object and set as index
data2['datetime'] = pd.to_datetime(data2['datetime'])
data2.set_index('datetime', inplace=True)

# Perform the same preprocessing on the new dataset
data2['open_change'] = data2['open'].pct_change() * 100
data2['high_change'] = data2['high'].pct_change() * 100
data2['low_change'] = data2['low'].pct_change() * 100
data2['close_change'] = data2['close'].pct_change() * 100
data2['volume_change'] = data2['volume'].pct_change() * 100

# Drop the original price, volume columns and SMA columns
data2.drop(['open', 'high', 'low', 'close', 'volume', 'SMA_12', 'SMA_26'], axis=1, inplace=True)

# Handle any NaN values
data2 = data2.dropna()

# Normalization of the test dataset using the scaler fitted on the training data
data2_scaled = scaler.transform(data2)
data2_scaled = pd.DataFrame(data2_scaled, columns=data2.columns, index=data2.index)

# Separating the target variable ('close_change') from features for the test dataset
X_test_new = data2_scaled.drop('close_change', axis=1)
y_test_new = data2_scaled['close_change']

# Predicting on the new test dataset
predictions_new = model.predict(X_test_new)

# Evaluating the model on the new test dataset
mae_new = mean_absolute_error(y_test_new, predictions_new)
print(f'Mean Absolute Error on new data: {mae_new}')


Mean Absolute Error: 0.008984071784944196
Mean Absolute Error on new data: 0.006935472577083764


In [2]:
from sklearn.metrics import mean_squared_error, r2_score

# Function to calculate Mean Absolute Percentage Error
def mean_absolute_percentage_error(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

# Evaluations for the initial test set
mae = mean_absolute_error(y_test, predictions)
mse = mean_squared_error(y_test, predictions)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, predictions)
mape = mean_absolute_percentage_error(y_test, predictions)

print(f'Initial Test Set Evaluation:')
print(f'Mean Absolute Error: {mae}')
print(f'Mean Squared Error: {mse}')
print(f'Root Mean Squared Error: {rmse}')
print(f'R-squared: {r2}')
print(f'Mean Absolute Percentage Error: {mape}%')

# Evaluations for the new test dataset
mae_new = mean_absolute_error(y_test_new, predictions_new)
mse_new = mean_squared_error(y_test_new, predictions_new)
rmse_new = np.sqrt(mse_new)
r2_new = r2_score(y_test_new, predictions_new)
mape_new = mean_absolute_percentage_error(y_test_new, predictions_new)

print(f'\nNew Test Dataset Evaluation:')
print(f'Mean Absolute Error: {mae_new}')
print(f'Mean Squared Error: {mse_new}')
print(f'Root Mean Squared Error: {rmse_new}')
print(f'R-squared: {r2_new}')
print(f'Mean Absolute Percentage Error: {mape_new}%')


Initial Test Set Evaluation:
Mean Absolute Error: 0.008984071784944196
Mean Squared Error: 0.0005130051859857719
Root Mean Squared Error: 0.022649617788955554
R-squared: 0.5452234164670671
Mean Absolute Percentage Error: inf%

New Test Dataset Evaluation:
Mean Absolute Error: 0.006935472577083764
Mean Squared Error: 0.00037227259889093087
Root Mean Squared Error: 0.019294367024883994
R-squared: 0.5608974883051441
Mean Absolute Percentage Error: inf%


  return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
  return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
