In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/futureprediction/updatedfuturepredictions.csv


In [5]:
!pip install tqdm




In [7]:
import pandas as pd
import numpy as np
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import (mean_squared_error, mean_absolute_error, r2_score,
                             mean_absolute_percentage_error, explained_variance_score,
                             median_absolute_error, max_error, mean_squared_log_error,
                             mean_poisson_deviance, mean_tweedie_deviance)
from tqdm import tqdm

# Load dataset
data = pd.read_csv('/kaggle/input/futureprediction/updatedfuturepredictions.csv')

# Assuming there's a timestamp column named 'DateTime'
data['DateTime'] = pd.to_datetime(data['DateTime'])
data.set_index('DateTime', inplace=True)

# Define time intervals based on the index
conditions = [
    (data.index.hour < 6),
    (data.index.hour >= 6) & (data.index.hour < 12),
    (data.index.hour >= 12) & (data.index.hour < 17),
    (data.index.hour >= 17)
]
timeframes = ['Night', 'Morning', 'Afternoon', 'Evening']
data['Timeframe'] = np.select(conditions, timeframes)

# Split data into features and target
X = data.drop(columns=['RZT', 'Timeframe'])
y = data['RZT']
X = X.select_dtypes(include=[np.number])

# Proceed with train-test split and model training
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Gradient Boosting Regression
gbr = GradientBoostingRegressor()
gbr.fit(X_train, y_train)

# Predict and evaluate Gradient Boosting Regression
y_pred_gbr = gbr.predict(X_test)

# Calculate and display multiple metrics
print("Gradient Boosting Regression Metrics:")
print("1. RMSE:", mean_squared_error(y_test, y_pred_gbr, squared=False))
print("2. MAE:", mean_absolute_error(y_test, y_pred_gbr))
print("3. Median Absolute Error:", median_absolute_error(y_test, y_pred_gbr))
print("4. R² Score:", r2_score(y_test, y_pred_gbr))
print("5. MAPE:", mean_absolute_percentage_error(y_test, y_pred_gbr))
print("6. Explained Variance Score:", explained_variance_score(y_test, y_pred_gbr))
print("7. Max Error:", max_error(y_test, y_pred_gbr))
print("8. Mean Squared Log Error:", mean_squared_log_error(y_test, y_pred_gbr))
print("9. Mean Poisson Deviance:", mean_poisson_deviance(y_test, y_pred_gbr))
print("10. Mean Tweedie Deviance:", mean_tweedie_deviance(y_test, y_pred_gbr))

# Generate optimized Gradient Boosting predictions for the entire dataset
data['RZT_Optimized_GBR'] = [gbr.predict(pd.DataFrame([row], columns=X.columns))[0] for row in tqdm(X.values, desc="Gradient Boosting Prediction")]

# Save to Excel
data.to_excel('RZT_Optimized_Predictions_GBR.xlsx', index=False)
print("Optimized Gradient Boosting predictions saved to 'RZT_Optimized_Predictions_GBR.xlsx'.")

# Calculate min and max for each timeframe
timeframe_stats_gbr = data.groupby('Timeframe')['RZT_Optimized_GBR'].agg(['min', 'max']).reset_index()
print("Optimized Gradient Boosting Predictions Min and Max for each Timeframe:")
print(timeframe_stats_gbr)


Gradient Boosting Regression Metrics:
1. RMSE: 0.025084978885867516
2. MAE: 0.01591421990713017
3. Median Absolute Error: 0.00900909156517038
4. R² Score: 0.9999131940684498
5. MAPE: 0.0007866914620903658
6. Explained Variance Score: 0.9999131941736996
7. Max Error: 0.1822003873971667
8. Mean Squared Log Error: 1.1985623469394176e-06
9. Mean Poisson Deviance: 2.828961004332559e-05
10. Mean Tweedie Deviance: 0.0006292561657044191


Gradient Boosting Prediction: 100%|██████████| 7200/7200 [00:11<00:00, 624.37it/s]


Optimized Gradient Boosting predictions saved to 'RZT_Optimized_Predictions_GBR.xlsx'.
Optimized Gradient Boosting Predictions Min and Max for each Timeframe:
   Timeframe        min        max
0  Afternoon  17.381618  31.402678
1    Evening  17.373185  31.862174
2    Morning  17.341338  34.557887
3      Night  17.341338  32.875898
