In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Generate sample data (replace this with your actual data)
data = {
    'Month': pd.date_range(start='2022-01-01', periods=24, freq='M'),
    'Inflation': np.random.randn(24),
    'Unemployment': np.random.randn(24),
    'S&P500_Price': np.random.rand(24) * 1000,
    'Bitcoin_Price': np.random.rand(24) * 50000
}

# Create a DataFrame from the sample data
df = pd.DataFrame(data)

# Calculate monthly returns for S&P 500 and Bitcoin
df['S&P500_Return'] = df['S&P500_Price'].pct_change()
df['Bitcoin_Return'] = df['Bitcoin_Price'].pct_change()

# Prepare the independent variables (macroeconomic factors) and dependent variables (asset returns)
X = df[['Inflation', 'Unemployment']].values
y_sp500 = df['S&P500_Return'].values.reshape(-1, 1)
y_bitcoin = df['Bitcoin_Return'].values.reshape(-1, 1)

# Split the data into training and testing sets (e.g., first 12 months for training, next 12 for testing)
train_size = 12
X_train, X_test = X[:train_size], X[train_size:]
y_sp500_train, y_sp500_test = y_sp500[1:train_size+1], y_sp500[train_size+1:]
y_bitcoin_train, y_bitcoin_test = y_bitcoin[1:train_size+1], y_bitcoin[train_size+1:]

# Create and fit linear regression models for S&P 500 and Bitcoin returns using training data
model_sp500 = LinearRegression().fit(X_train, y_sp500_train)
model_bitcoin = LinearRegression().fit(X_train, y_bitcoin_train)

# Predict using the testing set
sp500_predictions = model_sp500.predict(X_test)
bitcoin_predictions = model_bitcoin.predict(X_test)


# Calculate evaluation metrics
mse_sp500 = mean_squared_error(y_sp500_test, sp500_predictions)
mse_bitcoin = mean_squared_error(y_bitcoin_test, bitcoin_predictions)
r2_sp500 = r2_score(y_sp500_test, sp500_predictions)
r2_bitcoin = r2_score(y_bitcoin_test, bitcoin_predictions)

# Visualize predictions vs actual for S&P 500 and Bitcoin returns
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.scatter(y_sp500_test, sp500_predictions)
plt.title('S&P 500 Predictions vs Actual')
plt.xlabel('Actual Returns')
plt.ylabel('Predicted Returns')

plt.subplot(1, 2, 2)
plt.scatter(y_bitcoin_test, bitcoin_predictions)
plt.title('Bitcoin Predictions vs Actual')
plt.xlabel('Actual Returns')
plt.ylabel('Predicted Returns')

plt.tight_layout()
plt.show()

# Print evaluation metrics
print("S&P 500 Testing Report:")
print(f"MSE: {mse_sp500}")
print(f"R-squared: {r2_sp500}\n")

print("Bitcoin Testing Report:")
print(f"MSE: {mse_bitcoin}")
print(f"R-squared: {r2_bitcoin}\n")


ValueError: Found input variables with inconsistent numbers of samples: [11, 12]