In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [3]:
import pandas as pd
import numpy as np
from sklearn.linear_model import Ridge
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error, explained_variance_score
from tqdm import tqdm

# Load dataset
data = pd.read_csv('/kaggle/input/futurepred/updatedfuturepredictions.csv')

# Convert 'DateTime' to datetime format if it's not already
data['DateTime'] = pd.to_datetime(data['DateTime'])
data.set_index('DateTime', inplace=True)  # Set timestamp as index

# Define time intervals based on the index (hour of the day)
conditions = [
    (data.index.hour < 6),
    (data.index.hour >= 6) & (data.index.hour < 12),
    (data.index.hour >= 12) & (data.index.hour < 17),
    (data.index.hour >= 17)
]
timeframes = ['Night', 'Morning', 'Afternoon', 'Evening']

# Create a new column for timeframes
data['Timeframe'] = np.select(conditions, timeframes)

# Split data into features and target
X = data.drop(columns=['RZT', 'Timeframe'])  # Drop the target and non-relevant features
y = data['RZT']

# Drop non-numeric columns (like timestamps) from features
X = X.select_dtypes(include=[np.number])

# Proceed with train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Ridge Regression model
ridge = Ridge(alpha=1.0)
ridge.fit(X_train, y_train)

# Predict and evaluate Ridge Regression
y_pred_ridge = ridge.predict(X_test)

# Calculate and display metrics
print("Ridge Regression Metrics:")
print("RMSE:", mean_squared_error(y_test, y_pred_ridge, squared=False))
print("MAE:", mean_absolute_error(y_test, y_pred_ridge))
print("R² Score:", r2_score(y_test, y_pred_ridge))
print("MAPE:", mean_absolute_percentage_error(y_test, y_pred_ridge))
print("Explained Variance Score:", explained_variance_score(y_test, y_pred_ridge))

# Generate optimized Ridge predictions for the entire dataset
data['RZT_Optimized_Ridge'] = [
    ridge.predict(X.iloc[[i]])[0] for i in tqdm(range(len(X)), desc="Ridge Prediction")
]

# Save predictions to Excel
data.to_excel('RZT_Optimized_Predictions.xlsx', index=False)
print("Optimized Ridge predictions saved to 'RZT_Optimized_Predictions.xlsx'.")

# Calculate min and max RZT for each timeframe
timeframe_stats = data.groupby('Timeframe')['RZT_Optimized_Ridge'].agg(['min', 'max']).reset_index()
print("Optimized Ridge Predictions Min and Max for each Timeframe:")
print(timeframe_stats)


Ridge Regression Metrics:
RMSE: 0.00010859386576204629
MAE: 8.556138843053161e-05
R² Score: 0.9999999983732062
MAPE: 4.385661556390377e-06
Explained Variance Score: 0.9999999983734356


Ridge Prediction: 100%|██████████| 7200/7200 [00:08<00:00, 897.05it/s]


Optimized Ridge predictions saved to 'RZT_Optimized_Predictions.xlsx'.
Optimized Ridge Predictions Min and Max for each Timeframe:
   Timeframe        min        max
0  Afternoon  17.374215  31.460924
1    Evening  17.368994  31.774783
2    Morning  17.333801  34.608553
3      Night  17.338181  32.897261


In [4]:
from sklearn.metrics import (mean_squared_error, mean_absolute_error, r2_score,
                             mean_absolute_percentage_error, explained_variance_score,
                             median_absolute_error, max_error, mean_squared_log_error,
                             mean_poisson_deviance, mean_tweedie_deviance)

# Calculate and display multiple metrics
print("Ridge Regression Metrics:")
print("1. RMSE:", mean_squared_error(y_test, y_pred_ridge, squared=False))
print("2. MAE:", mean_absolute_error(y_test, y_pred_ridge))
print("3. Median Absolute Error:", median_absolute_error(y_test, y_pred_ridge))
print("4. R² Score:", r2_score(y_test, y_pred_ridge))
print("5. MAPE:", mean_absolute_percentage_error(y_test, y_pred_ridge))
print("6. Explained Variance Score:", explained_variance_score(y_test, y_pred_ridge))
print("7. Max Error:", max_error(y_test, y_pred_ridge))
print("8. Mean Squared Log Error:", mean_squared_log_error(y_test, y_pred_ridge))
print("9. Mean Poisson Deviance:", mean_poisson_deviance(y_test, y_pred_ridge))
print("10. Mean Tweedie Deviance:", mean_tweedie_deviance(y_test, y_pred_ridge))


Ridge Regression Metrics:
1. RMSE: 0.00010859386576204629
2. MAE: 8.556138843053161e-05
3. Median Absolute Error: 7.251602758628906e-05
4. R² Score: 0.9999999983732062
5. MAPE: 4.385661556390377e-06
6. Explained Variance Score: 0.9999999983734356
7. Max Error: 0.00036322018203804873
8. Mean Squared Log Error: 2.650877633197765e-11
9. Mean Poisson Deviance: 5.811804568464948e-10
10. Mean Tweedie Deviance: 1.179262768114533e-08
