In [10]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score

#Importing Dataset
dataset=pd.read_csv(r'D:\Programming\Datasets\Regression\Bitcoin\bitstampUSD_1-min_data_2012-01-01_to_2021-03-31.csv')

dataset = dataset.dropna()

X = dataset[['Open', 'High', 'Low', 'Close', 'Volume_(BTC)', 'Volume_(Currency)']]
y = dataset[['Weighted_Price']]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

y_train = np.ravel(y_train)
y_test = np.ravel(y_test)

In [11]:
X_corrupt_25 = X_train.copy()
X_corrupt_50 = X_train.copy()
X_corrupt_75 = X_train.copy()

# Introduce noise to a percentage of the training data
corrupt_25 = int(len(X_train) * 25 / 100)
corrupt_50 = int(len(X_train) * 50 / 100)
corrupt_75 = int(len(X_train) * 75 / 100)

# Randomly select samples to corrupt
indices_corrupt_25 = np.random.choice(len(X_corrupt_25), corrupt_25, replace=False)
indices_corrupt_50 = np.random.choice(len(X_corrupt_50), corrupt_50, replace=False)
indices_corrupt_75 = np.random.choice(len(X_corrupt_75), corrupt_75, replace=False)

# Introduce noise to selected samples
X_corrupt_25[indices_corrupt_25, :] = np.random.normal(0, 10, size=X_corrupt_25[indices_corrupt_25, :].shape)
X_corrupt_50[indices_corrupt_50, :] = np.random.normal(0, 10, size=X_corrupt_50[indices_corrupt_50, :].shape)
X_corrupt_75[indices_corrupt_75, :] = np.random.normal(0, 10, size=X_corrupt_75[indices_corrupt_75, :].shape)

In [12]:
from sklearn.linear_model import Ridge

ridge_model = Ridge()
ridge_model.fit(X_train, y_train)
y_pred = ridge_model.predict(X_test)

ridge_model_25 = Ridge()
ridge_model_25.fit(X_corrupt_25, y_train)
y_pred_25 = ridge_model_25.predict(X_test)

ridge_model_50 = Ridge()
ridge_model_50.fit(X_corrupt_50, y_train)
y_pred_50 = ridge_model_50.predict(X_test)

ridge_model_75 = Ridge()
ridge_model_75.fit(X_corrupt_75, y_train)
y_pred_75 = ridge_model_75.predict(X_test)

In [13]:
mse_ridge = mean_squared_error(y_test, y_pred)
r2_ridge = r2_score(y_test, y_pred)
rmse_ridge = np.sqrt(mse_ridge)
rmspe_ridge = np.sqrt(np.mean(((y_test - y_pred) / y_test) ** 2)) * 100
print('Ridge Regression Results 0% Noise:')
print(f'Mean Squared Error: {mse_ridge}')
print(f'Root Mean Squared Error: {rmse_ridge}')
print(f'RMSPE: {rmspe_ridge}%')
print(f'R-squared: {r2_ridge}')
print("")

mse_ridge = mean_squared_error(y_test, y_pred_25)
r2_ridge = r2_score(y_test, y_pred_25)
rmse_ridge = np.sqrt(mse_ridge)
rmspe_ridge = np.sqrt(np.mean(((y_test - y_pred_25) / y_test) ** 2)) * 100
print('Ridge Regression Results 25% Noise:')
print(f'Mean Squared Error: {mse_ridge}')
print(f'Root Mean Squared Error: {rmse_ridge}')
print(f'RMSPE: {rmspe_ridge}%')
print(f'R-squared: {r2_ridge}')
print("")

mse_ridge = mean_squared_error(y_test, y_pred_50)
r2_ridge = r2_score(y_test, y_pred_50)
rmse_ridge = np.sqrt(mse_ridge)
rmspe_ridge = np.sqrt(np.mean(((y_test - y_pred_50) / y_test) ** 2)) * 100
print('Ridge Regression Results 50% Noise:')
print(f'Mean Squared Error: {mse_ridge}')
print(f'Root Mean Squared Error: {rmse_ridge}')
print(f'RMSPE: {rmspe_ridge}%')
print(f'R-squared: {r2_ridge}')
print("")

mse_ridge = mean_squared_error(y_test, y_pred_75)
r2_ridge = r2_score(y_test, y_pred_75)
rmse_ridge = np.sqrt(mse_ridge)
rmspe_ridge = np.sqrt(np.mean(((y_test - y_pred_75) / y_test) ** 2)) * 100
print('Ridge Regression Results 75% Noise:')
print(f'Mean Squared Error: {mse_ridge}')
print(f'Root Mean Squared Error: {rmse_ridge}')
print(f'RMSPE: {rmspe_ridge}%')
print(f'R-squared: {r2_ridge}')
print("")

Ridge Regression Results 0% Noise:
Mean Squared Error: 17.305029089206332
Root Mean Squared Error: 4.1599313803482785
RMSPE: 0.1592559217885287%
R-squared: 0.9999997846515809

Ridge Regression Results 25% Noise:
Mean Squared Error: 63650761.11146449
Root Mean Squared Error: 7978.142710647917
RMSPE: 5703.313306902096%
R-squared: 0.20791287278819826

Ridge Regression Results 50% Noise:
Mean Squared Error: 74172613.8589662
Root Mean Squared Error: 8612.352399836307
RMSPE: 6163.765003348848%
R-squared: 0.07697611774893909

Ridge Regression Results 75% Noise:
Mean Squared Error: 78207461.25917667
Root Mean Squared Error: 8843.498247818941
RMSPE: 6331.230396815259%
R-squared: 0.02676539551776258

