In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
combined_data = pd.read_csv('us_market_data_hourly.csv')
columns_to_drop = ['observation_date','unemployment', 'Yield Spread', 'US UK', 'Sentiment']
filtered_data = combined_data.drop(columns=columns_to_drop)
X = filtered_data
y = filtered_data['Inflation']
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_train, X_temp, y_train, y_temp = train_test_split(X_scaled, y, test_size=0.30, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.67, random_state=42)
model = Sequential([
    Dense(256, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.3),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dropout(0.2),
    Dense(32, activation='relu'),
    Dense(1)
])
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=10,
    restore_best_weights=True,
    verbose=1
)
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=100,
    batch_size=32,
    verbose=1,
    callbacks=[early_stopping]
)
y_pred = model.predict(X_test).flatten()
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print(f"Mean Absolute Error (MAE): {mae}")
print(f"Mean Squared Error (MSE): {mse}")
print(f"Root Mean Squared Error (RMSE): {rmse}")
print(f"R-squared (R²): {r2}")

plt.figure(figsize=(10, 6))
plt.scatter(y_test, y_pred, label='Predicted vs Actual')
plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], color='red', linestyle='--', label='Ideal Line')
plt.title('Predicted vs Actual values')
plt.xlabel('Actual values')
plt.ylabel('Predicted values')
plt.legend()
plt.show()


ModuleNotFoundError: No module named 'numpy'

In [None]:
svr = SVR(kernel='rbf', C=100, gamma=0.1, epsilon=0.1)
svr.fit(X_train, y_train)
y_val_pred = svr.predict(X_val)
val_mae = mean_absolute_error(y_val, y_val_pred)
val_mse = mean_squared_error(y_val, y_val_pred)
val_rmse = np.sqrt(val_mse)
val_r2 = r2_score(y_val, y_val_pred)

print("Validation Metrics:")
print(f"Mean Absolute Error (MAE): {val_mae}")
print(f"Mean Squared Error (MSE): {val_mse}")
print(f"Root Mean Squared Error (RMSE): {val_rmse}")
print(f"R-squared (R²): {val_r2}")
y_test_pred = svr.predict(X_test)
test_mae = mean_absolute_error(y_test, y_test_pred)
test_mse = mean_squared_error(y_test, y_test_pred)
test_rmse = np.sqrt(test_mse)
test_r2 = r2_score(y_test, y_test_pred)

print("\nTest Metrics:")
print(f"Mean Absolute Error (MAE): {test_mae}")
print(f"Mean Squared Error (MSE): {test_mse}")
print(f"Root Mean Squared Error (RMSE): {test_rmse}")
print(f"R-squared (R²): {test_r2}")

plt.figure(figsize=(10, 6))

plt.scatter(range(len(y_test)), y_test, color='blue', alpha=0.7, label='Actual Values', s=50)

plt.scatter(range(len(y_test_pred)), y_test_pred, color='orange', alpha=0.7, label='Predicted Values', s=50)

plt.title("SVM Predicted vs Actual Values", fontsize=14)
plt.xlabel("Sample Index", fontsize=12)
plt.ylabel("Inflation Rate", fontsize=12)
plt.legend()
plt.show()
 


In [None]:
model = RandomForestRegressor(n_estimators=100, random_state=42, max_depth=10)
model.fit(X_train, y_train)

y_val_pred = model.predict(X_val)

val_mae = mean_absolute_error(y_val, y_val_pred)
val_mse = mean_squared_error(y_val, y_val_pred)
val_rmse = np.sqrt(val_mse)
val_r2 = r2_score(y_val, y_val_pred)

print("Validation Metrics:")
print(f"Mean Absolute Error (MAE): {val_mae}")
print(f"Mean Squared Error (MSE): {val_mse}")
print(f"Root Mean Squared Error (RMSE): {val_rmse}")
print(f"R-squared (R²): {val_r2}")

y_test_pred = model.predict(X_test)
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=list(range(len(y_test))),
    y=y_test,
    mode='markers',
    marker=dict(color='blue', size=8, opacity=0.7),
    name='Actual Values'
))
fig.add_trace(go.Scatter(
    x=list(range(len(y_test_pred))),
    y=y_test_pred,
    mode='markers',
    marker=dict(color='orange', size=8, opacity=0.7),
    name='Predicted Values'
))
fig.update_layout(
    title="Random Forest Predicted vs Actual Values",
    xaxis_title="Sample Index",
    yaxis_title="Inflation Rate",
    legend_title="Legend",
    template="plotly_white"
)
fig.show()

In [None]:
if target in filtered_data.columns:
    X = filtered_data.drop(columns=[target])
    y = filtered_data['Inflation'] 
X = X.select_dtypes(include=['number']).copy()
xgb_model = XGBRegressor(
    n_estimators=100,
    max_depth=5,
    learning_rate=0.1,
    objective='reg:squarederror',
    random_state=42
)
xgb_model.fit(X_train, y_train)
y_val_pred = xgb_model.predict(X_val)
val_mae = mean_absolute_error(y_val, y_val_pred)
val_mse = mean_squared_error(y_val, y_val_pred)
val_rmse = np.sqrt(val_mse)
val_r2 = r2_score(y_val, y_val_pred)

print("Validation Metrics:")
print(f"Mean Absolute Error (MAE): {val_mae}")
print(f"Mean Squared Error (MSE): {val_mse}")
print(f"Root Mean Squared Error (RMSE): {val_rmse}")
print(f"R-squared (R²): {val_r2}")
y_test_pred = xgb_model.predict(X_test)
test_mae = mean_absolute_error(y_test, y_test_pred)
test_mse = mean_squared_error(y_test, y_test_pred)
test_rmse = np.sqrt(test_mse)
test_r2 = r2_score(y_test, y_test_pred)

print("\nTest Metrics:")
print(f"Mean Absolute Error (MAE): {test_mae}")
print(f"Mean Squared Error (MSE): {test_mse}")
print(f"Root Mean Squared Error (RMSE): {test_rmse}")
print(f"R-squared (R²): {test_r2}")
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=list(range(len(y_test))),
    y=y_test,
    mode='markers',
    marker=dict(color='green', size=8, opacity=0.7),
    name='Actual Values'
))
fig.add_trace(go.Scatter(
    x=list(range(len(y_test_pred))),
    y=y_test_pred,
    mode='markers',
    marker=dict(color='red', size=8, opacity=0.7),
    name='Predicted Values'
))
fig.update_layout(
    title="XGBoost Predicted vs Actual Values",
    xaxis_title="Sample Index",
    yaxis_title="Inflation Rate",
    legend_title="Legend",
    template="plotly_white"
)
fig.show()

In [None]:
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dropout(0.2))
model.add(LSTM(50, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(1))  
model.compile(optimizer='adam', loss='mse', metrics=['mae'])
early_stopping = EarlyStopping(
    monitor='val_loss',  
    patience=10,         
    restore_best_weights=True,
    verbose=1
)
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=100,
    batch_size=32,
    verbose=1,
    callbacks=[early_stopping]
)
y_val_pred = model.predict(X_val)
y_test_pred = model.predict(X_test)

mae_val = mean_absolute_error(y_val, y_val_pred)
mse_val = mean_squared_error(y_val, y_val_pred)
r2_val = r2_score(y_val, y_val_pred)

print("Validation Metrics:")
print(f"Mean Absolute Error (MAE): {mae_val}")
print(f"Mean Squared Error (MSE): {mse_val}")
print(f"R-squared (R²): {r2_val}")

mae_test = mean_absolute_error(y_test, y_test_pred)
mse_test = mean_squared_error(y_test, y_test_pred)
r2_test = r2_score(y_test, y_test_pred)

print("\nTest Metrics:")
print(f"Mean Absolute Error (MAE): {mae_test}")
print(f"Mean Squared Error (MSE): {mse_test}")
print(f"R-squared (R²): {r2_test}")

fig = go.Figure()
fig.add_trace(go.Scatter(
    x=list(range(len(y_test))),
    y=y_test,
    mode='markers',
    marker=dict(color='blue', size=8, opacity=0.7),
    name='Actual Values'
))
fig.add_trace(go.Scatter(
    x=list(range(len(y_test_pred))),
    y=y_test_pred.flatten(),
    mode='markers',
    marker=dict(color='orange', size=8, opacity=0.7),
    name='Predicted Values'
))
fig.update_layout(
    title="LSTM Predicted vs Actual Values",
    xaxis_title="Sample Index",
    yaxis_title="Inflation Rate",
    legend_title="Legend",
    template="plotly_white"
)
fig.show()
