In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures
from sklearn.neural_network import MLPRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
#import catboost as cb
#import lightgbm as lgb
#import xgboost as xgb
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor



In [None]:
X_train = pd.read_csv("train/X_train.csv")
y_train = pd.read_csv("train/y_train.csv")

X_val = pd.read_csv("validation/X_val.csv")
y_val = pd.read_csv("validation/y_val.csv")

X_test = pd.read_csv("test/X_test.csv")
y_test = pd.read_csv("test/y_test.csv")

In [None]:
# Plot data
X_train


In [None]:
y_train

### Regression

In [None]:
degrees = [1, 2, 3, 4]
validation_errors = []
best_degree=1

"""for degree in degrees:
    # Create polynomial features
    poly = PolynomialFeatures(degree=degree)
    X_train_poly = poly.fit_transform(X_train)
    X_val_poly = poly.transform(X_val)
    
    # Train a linear regression model on the polynomial features
    model = LinearRegression()
    model.fit(X_train_poly, y_train)
    
    # Predict on the validation set and calculate error
    y_val_pred = model.predict(X_val_poly)
    val_mse = mean_squared_error(y_val, y_val_pred)
    validation_errors.append(val_mse)
    
    print(f"Degree: {degree}, Validation MSE: {val_mse}")

# Select the best polynomial degree based on the lowest validation error
best_degree = degrees[np.argmin(validation_errors)]
print(f"Best Polynomial Degree: {best_degree}")"""

# Combine training and validation sets
X_train_val = pd.concat([X_train, X_val])
y_train_val = pd.concat([y_train, y_val])

# Create polynomial features with the best degree
poly = PolynomialFeatures(degree=best_degree)
X_train_val_poly = poly.fit_transform(X_train_val)
X_test_poly = poly.transform(X_test)

# Train the model on the combined training and validation set
final_model = LinearRegression()
final_model.fit(X_train_val_poly, y_train_val)

# Predict on the test set
y_test_pred = final_model.predict(X_test_poly)

# Calculate the Mean Squared Error
test_mse = mean_squared_error(y_test, y_test_pred)
test_r2 = r2_score(y_test, y_test_pred)
print(f"Test Mean Squared Error: {test_mse}")
print(f"Test R^2: {test_r2}")

# Plot actual vs predicted values for the test set
plt.plot(range(len(y_test)), y_test, label="Actual Values")
plt.plot(range(len(y_test_pred)), y_test_pred, label="Predicted Values")
plt.legend()
plt.xlabel("Time Steps (Test Data)")
plt.ylabel("Value")
plt.show()


### Neural Network

In [None]:
#neural network
model = MLPRegressor(hidden_layer_sizes=(10,), max_iter=2000, random_state=0)

# Train the model on the training data
model.fit(X_train, y_train)
# Predict on the test set
y_pred = model.predict(X_test)

# Calculate the Mean Squared Error
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"Mean Squared Error: {mse}")
print(f"R^2: {r2}")

# Plot actual vs predicted values for the test set
plt.plot(range(len(y_test)), y_test, label="Actual Values")
plt.plot(range(len(y_pred)), y_pred, label="Predicted Values")
plt.legend()
plt.xlabel("Time Steps (Test Data)")
plt.ylabel("Value")
plt.show()

### Decision Tree

In [None]:
# Decision tree regressor
model_dt = DecisionTreeRegressor(random_state=42)
model_dt.fit(X_train, y_train)
y_pred_dt = model_dt.predict(X_test)

mse_dt = mean_squared_error(y_test, y_pred_dt)
r2_dt = r2_score(y_test, y_pred_dt)
print(f"Mean Squared Error Decision Tree: {mse_dt}")
print(f"R^2 Decision Tree: {r2_dt}")

plt.plot(range(len(y_test)), y_test, label="Actual Values")
plt.plot(range(len(y_pred_dt)), y_pred_dt, label="Predicted Values")
plt.legend()
plt.title("Decision Tree Regressor")
plt.xlabel("Time Steps (Test Data)")
plt.ylabel("Value")
plt.show()

### Random forrest

In [None]:
model_rf = RandomForestRegressor(random_state=42)
model_rf.fit(X_train, y_train, )
y_pred_rf = model_rf.predict(X_test)

mse_rf = mean_squared_error(y_test, y_pred_rf)
r2_rf = r2_score(y_test, y_pred_rf)
print(f"Mean Squared Error Random Forest: {mse_rf}")
print(f"R^2 Random Forest: {r2_rf}")

plt.plot(range(len(y_test)), y_test, label="Actual Values")
plt.plot(range(len(y_pred_rf)), y_pred_rf, label="Predicted Values")
plt.legend()
plt.title("Random Forest")
plt.xlabel("Time Steps (Test Data)")
plt.ylabel("Value")
plt.show()

### SVM with kernel

In [None]:
# SVM regressor (Support Vector Machine)
from sklearn.svm import SVR
model_svm = SVR()
model_svm.fit(X_train, y_train)
y_pred_svm = model_svm.predict(X_test)

mse_svm = mean_squared_error(y_test, y_pred_svm)
r2_svm = r2_score(y_test, y_pred_svm)
print(f"Mean Squared Error SVM: {mse_svm}")
print(f"R^2 SVM: {r2_svm}")

plt.plot(range(len(y_test)), y_test, label="Actual Values")
plt.plot(range(len(y_pred_svm)), y_pred_svm, label="Predicted Values")
plt.legend()
plt.title("Support Vector Machine")
plt.xlabel("Time Steps (Test Data)")
plt.ylabel("Value")
plt.show()

In [None]:
print(len(X_train.columns))

In [None]:
# additive model gam 
from pygam import LinearGAM, s

n_features = X_train.shape[1]
terms = [s(i) for i in range(n_features)]
terms = s(0) + s(1) + s(2) + s(3) + s(4) + s(5) + s(6) + s(7) + s(8) + s(9) + s(10) + s(11) + s(12) + s(13) + s(14) + s(15) + s(16) + s(17) + s(18) + s(19) + s(20) + s(21) + s(22) + s(23) + s(24) + s(25) + s(26) + s(27) + s(28) + s(29) + s(30) + s(31) + s(32) + s(33) + s(34) 
model_gam = LinearGAM(terms)
model_gam.fit(X_train, y_train)
y_pred_gam = model_gam.predict(X_test)

mse_gam = mean_squared_error(y_test, y_pred_gam)
r2_gam = r2_score(y_test, y_pred_gam)

print(f"Mean Squared Error GAM: {mse_gam}")
print(f"R^2 GAM: {r2_gam}")

plt.plot(range(len(y_test)), y_test, label="Actual Values")
plt.plot(range(len(y_pred_gam)), y_pred_gam, label="Predicted Values")
plt.legend()
plt.title("Generalized Additive Model")
plt.xlabel("Time Steps (Test Data)")
plt.ylabel("Value")
plt.show()
