In [146]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import pickle

In [147]:
df = pd.read_excel('ksrdata.xlsx')

In [148]:
# Feature selection (replace with your actual feature columns)
features = ['PRESENT_STORAGE_TMC', 'RESERVOIR_LEVEL_FT', 'Seasons1']
target_inflow = 'INFLOW_CUSECS'   # Replace with your actual inflow target column
target_outflow = 'OUTFLOW_CUSECS' # Replace with your actual outflow target column

In [149]:
# Split the dataset into input features (X) and output target (y)
X = df[features]

In [150]:
# For inflow prediction
y_inflow = df[target_inflow]

In [151]:
# For outflow prediction
y_outflow = df[target_outflow]

In [152]:
# Split the data into training and test sets for inflow
X_train_inflow, X_test_inflow, y_train_inflow, y_test_inflow = train_test_split(X, y_inflow, test_size=0.2, random_state=42)

# Split the data into training and test sets for outflow
X_train_outflow, X_test_outflow, y_train_outflow, y_test_outflow = train_test_split(X, y_outflow, test_size=0.2, random_state=42)

In [153]:
# Initialize the Gradient Boosting Regressor model
gbr_inflow = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
gbr_outflow = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)

In [154]:
# Train the model for inflow prediction
gbr_inflow.fit(X_train_inflow, y_train_inflow)

In [155]:
# Train the model for outflow prediction
gbr_outflow.fit(X_train_outflow, y_train_outflow)

In [156]:
# Make predictions on the test set for inflow
y_pred_inflow = gbr_inflow.predict(X_test_inflow)

# Make predictions on the test set for outflow
y_pred_outflow = gbr_outflow.predict(X_test_outflow)

In [157]:
# Calculate the mean of the predicted inflow and outflow rates
final_inflow = np.mean(y_pred_inflow)
final_outflow = np.mean(y_pred_outflow)

In [158]:
# Print the final inflow and outflow rates
print(f"Final Predicted Inflow Rate (in cusecs): {final_inflow:.2f}")
print(f"Final Predicted Outflow Rate (in cusecs): {final_outflow:.2f}")

Final Predicted Inflow Rate (in cusecs): 5073.65
Final Predicted Outflow Rate (in cusecs): 4828.34


In [159]:
# Evaluate the model performance for inflow
mse_inflow = mean_squared_error(y_test_inflow, y_pred_inflow)
mae_inflow = mean_absolute_error(y_test_inflow, y_pred_inflow)
r2_inflow = r2_score(y_test_inflow, y_pred_inflow)

# Evaluate the model performance for outflow
mse_outflow = mean_squared_error(y_test_outflow, y_pred_outflow)
mae_outflow = mean_absolute_error(y_test_outflow, y_pred_outflow)
r2_outflow = r2_score(y_test_outflow, y_pred_outflow)

In [160]:
# Print evaluation metrics for inflow
print(f"Inflow Prediction - MSE: {mse_inflow}, MAE: {mae_inflow}, R²: {r2_inflow}")

# Print evaluation metrics for outflow
print(f"Outflow Prediction - MSE: {mse_outflow}, MAE: {mae_outflow}, R²: {r2_outflow}")

Inflow Prediction - MSE: 51901052.12018839, MAE: 3135.0919560400457, R²: 0.25524293389615327
Outflow Prediction - MSE: 50918868.54388314, MAE: 3074.662104932885, R²: 0.24597164691396234


In [161]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import numpy as np


# Define a threshold to convert continuous predictions to binary classes
#default threshold is 0.5
threshold = 0.5

# Convert predictions to binary classes
y_test_inflow_class = (y_test_inflow > threshold).astype(int)
y_test_outflow_class = (y_test_outflow > threshold).astype(int)
pred_inflow_class = (y_pred_inflow > threshold).astype(int)
pred_outflow_class = (y_pred_outflow > threshold).astype(int)


In [162]:
# For inflow
accuracy_inflow = accuracy_score(y_test_inflow_class, pred_inflow_class)
precision_inflow = precision_score(y_test_inflow_class, pred_inflow_class)
recall_inflow = recall_score(y_test_inflow_class, pred_inflow_class)
f1_inflow = f1_score(y_test_inflow_class, pred_inflow_class)
confusion_matrix_inflow = confusion_matrix(y_test_inflow_class, pred_inflow_class)

# Convert R² score to percentage
accuracy_inflow = r2_inflow * 100

# For outflow
accuracy_outflow = accuracy_score(y_test_outflow_class, pred_outflow_class)
precision_outflow = precision_score(y_test_outflow_class, pred_outflow_class)
recall_outflow = recall_score(y_test_outflow_class, pred_outflow_class)
f1_outflow = f1_score(y_test_outflow_class, pred_outflow_class)
confusion_matrix_outflow = confusion_matrix(y_test_outflow_class, pred_outflow_class)

# Convert R² score to percentage
accuracy_outflow = r2_outflow * 100


In [163]:
# Print metrics for inflow
print(f"Inflow - Accuracy: {accuracy_inflow:.4f}")
print(f"Inflow - Precision: {precision_inflow:.4f}")
print(f"Inflow - Recall: {recall_inflow:.4f}")
print(f"Inflow - F1 Score: {f1_inflow:.4f}")
print("Inflow - Confusion Matrix:")
print(confusion_matrix_inflow)

# Print metrics for outflow
print(f"Outflow - Accuracy: {accuracy_outflow:.4f}")
print(f"Outflow - Precision: {precision_outflow:.4f}")
print(f"Outflow - Recall: {recall_outflow:.4f}")
print(f"Outflow - F1 Score: {f1_outflow:.4f}")
print("Outflow - Confusion Matrix:")
print(confusion_matrix_outflow)

Inflow - Accuracy: 25.5243
Inflow - Precision: 1.0000
Inflow - Recall: 0.9985
Inflow - F1 Score: 0.9992
Inflow - Confusion Matrix:
[[  0   0]
 [  1 662]]
Outflow - Accuracy: 24.5972
Outflow - Precision: 0.9970
Outflow - Recall: 1.0000
Outflow - F1 Score: 0.9985
Outflow - Confusion Matrix:
[[  0   2]
 [  0 661]]


In [164]:
# Print the accuracy in percentage
print(f"Inflow Prediction Accuracy: {accuracy_inflow:.2f}%")
print(f"Outflow Prediction Accuracy: {accuracy_outflow:.2f}%")

Inflow Prediction Accuracy: 25.52%
Outflow Prediction Accuracy: 24.60%


In [165]:
# Print evaluation metrics for inflow
print(f"Inflow Prediction - MSE: {mse_inflow}, MAE: {mae_inflow}, R²: {r2_inflow}")

# Print evaluation metrics for outflow
print(f"Outflow Prediction - MSE: {mse_outflow}, MAE: {mae_outflow}, R²: {r2_outflow}")

Inflow Prediction - MSE: 51901052.12018839, MAE: 3135.0919560400457, R²: 0.25524293389615327
Outflow Prediction - MSE: 50918868.54388314, MAE: 3074.662104932885, R²: 0.24597164691396234


In [166]:
# Save the trained models as .pkl files for later use
with open('gbr_inflow_model.pkl', 'wb') as inflow_model_file1:
    pickle.dump(gbr_inflow, inflow_model_file1)

In [167]:
print(type(gbr_inflow)) 

<class 'sklearn.ensemble._gb.GradientBoostingRegressor'>


In [168]:
with open('gbr_outflow_model.pkl', 'wb') as outflow_model_file2:
    pickle.dump(gbr_outflow, outflow_model_file2)

In [169]:
print(type(gbr_outflow)) 

<class 'sklearn.ensemble._gb.GradientBoostingRegressor'>


In [170]:
from sklearn.svm import SVR
# Split the data into training and test sets for inflow
X_train_inflow, X_test_inflow, y_train_inflow, y_test_inflow = train_test_split(X, y_inflow, test_size=0.2, random_state=42)

# Split the data into training and test sets for outflow
X_train_outflow, X_test_outflow, y_train_outflow, y_test_outflow = train_test_split(X, y_outflow, test_size=0.2, random_state=42)

In [171]:
# Initialize the SVR model
svr_inflow = SVR(kernel='rbf', C=100, gamma='auto')
svr_outflow = SVR(kernel='rbf', C=100, gamma='auto')

In [172]:
# Train the model for inflow prediction
svr_inflow.fit(X_train_inflow, y_train_inflow)

In [173]:
# Train the model for outflow prediction
svr_outflow.fit(X_train_outflow, y_train_outflow)

In [174]:
# Make predictions on the test set for inflow
y_pred_inflow = svr_inflow.predict(X_test_inflow)

# Make predictions on the test set for outflow
y_pred_outflow = svr_outflow.predict(X_test_outflow)

In [175]:
# Evaluate the model performance for inflow
mse_inflow = mean_squared_error(y_test_inflow, y_pred_inflow)
mae_inflow = mean_absolute_error(y_test_inflow, y_pred_inflow)
r2_inflow = r2_score(y_test_inflow, y_pred_inflow)

# Evaluate the model performance for outflow
mse_outflow = mean_squared_error(y_test_outflow, y_pred_outflow)
mae_outflow = mean_absolute_error(y_test_outflow, y_pred_outflow)
r2_outflow = r2_score(y_test_outflow, y_pred_outflow)

In [176]:
# Print evaluation metrics for inflow
print(f"Inflow Prediction - MSE: {mse_inflow}, MAE: {mae_inflow}, R²: {r2_inflow}")

# Print evaluation metrics for outflow
print(f"Outflow Prediction - MSE: {mse_outflow}, MAE: {mae_outflow}, R²: {r2_outflow}")

Inflow Prediction - MSE: 64599723.56812435, MAE: 3321.642483046926, R²: 0.07302263383208718
Outflow Prediction - MSE: 61068128.52995364, MAE: 2854.3063323786223, R²: 0.09567707024356031


In [177]:
# For inflow
accuracy_inflow = accuracy_score(y_test_inflow_class, pred_inflow_class)
precision_inflow = precision_score(y_test_inflow_class, pred_inflow_class)
recall_inflow = recall_score(y_test_inflow_class, pred_inflow_class)
f1_inflow = f1_score(y_test_inflow_class, pred_inflow_class)
confusion_matrix_inflow = confusion_matrix(y_test_inflow_class, pred_inflow_class)

# Convert R² score to percentage
accuracy_inflow = r2_inflow * 100

# For outflow
accuracy_outflow = accuracy_score(y_test_outflow_class, pred_outflow_class)
precision_outflow = precision_score(y_test_outflow_class, pred_outflow_class)
recall_outflow = recall_score(y_test_outflow_class, pred_outflow_class)
f1_outflow = f1_score(y_test_outflow_class, pred_outflow_class)
confusion_matrix_outflow = confusion_matrix(y_test_outflow_class, pred_outflow_class)

# Convert R² score to percentage
accuracy_outflow = r2_outflow * 100


In [62]:
# Print metrics for inflow
print(f"Inflow - Accuracy: {accuracy_inflow:.4f}")
print(f"Inflow - Precision: {precision_inflow:.4f}")
print(f"Inflow - Recall: {recall_inflow:.4f}")
print(f"Inflow - F1 Score: {f1_inflow:.4f}")
print("Inflow - Confusion Matrix:")
print(confusion_matrix_inflow)

# Print metrics for outflow
print(f"Outflow - Accuracy: {accuracy_outflow:.4f}")
print(f"Outflow - Precision: {precision_outflow:.4f}")
print(f"Outflow - Recall: {recall_outflow:.4f}")
print(f"Outflow - F1 Score: {f1_outflow:.4f}")
print("Outflow - Confusion Matrix:")
print(confusion_matrix_outflow)



Inflow - Accuracy: 7.3023
Inflow - Precision: 1.0000
Inflow - Recall: 1.0000
Inflow - F1 Score: 1.0000
Inflow - Confusion Matrix:
[[663]]
Outflow - Accuracy: 9.5677
Outflow - Precision: 0.9970
Outflow - Recall: 1.0000
Outflow - F1 Score: 0.9985
Outflow - Confusion Matrix:
[[  0   2]
 [  0 661]]
Inflow Prediction Accuracy: 7.30%
Outflow Prediction Accuracy: 9.57%


In [178]:
# Print the accuracy in percentage
print(f"Inflow Prediction Accuracy: {accuracy_inflow:.2f}%")
print(f"Outflow Prediction Accuracy: {accuracy_outflow:.2f}%")

Inflow Prediction Accuracy: 7.30%
Outflow Prediction Accuracy: 9.57%


In [179]:
# Save the trained models as .pkl files for later use
with open('svr_inflow_model.pkl', 'wb') as inflow_model_file3:
    pickle.dump(svr_inflow, inflow_model_file3)

In [180]:
print(type(svr_outflow)) 

<class 'sklearn.svm._classes.SVR'>


In [181]:
with open('svr_outflow_model.pkl', 'wb') as outflow_model_file4:
    pickle.dump(svr_outflow, outflow_model_file4)

In [182]:
print(type(svr_inflow)) 

<class 'sklearn.svm._classes.SVR'>


In [183]:
from sklearn.ensemble import RandomForestRegressor

In [184]:
rf_inflow = RandomForestRegressor(n_estimators=100, random_state=42)
rf_inflow.fit(X_train_inflow, y_train_inflow)

In [185]:
rf_outflow = RandomForestRegressor(n_estimators=100, random_state=42)
rf_outflow.fit(X_train_outflow, y_train_outflow)

In [186]:
# Make predictions on the test set for inflow
y_pred_inflow = rf_inflow.predict(X_test_inflow)

# Make predictions on the test set for outflow
y_pred_outflow = rf_outflow.predict(X_test_outflow)

In [187]:
# Evaluate the model performance for inflow
mse_inflow = mean_squared_error(y_test_inflow, y_pred_inflow)
mae_inflow = mean_absolute_error(y_test_inflow, y_pred_inflow)
r2_inflow = r2_score(y_test_inflow, y_pred_inflow)

# Evaluate the model performance for outflow
mse_outflow = mean_squared_error(y_test_outflow, y_pred_outflow)
mae_outflow = mean_absolute_error(y_test_outflow, y_pred_outflow)
r2_outflow = r2_score(y_test_outflow, y_pred_outflow)

In [188]:
# Print evaluation metrics for inflow
print(f"Inflow Prediction - MSE: {mse_inflow}, MAE: {mae_inflow}, R²: {r2_inflow}")

# Print evaluation metrics for outflow
print(f"Outflow Prediction - MSE: {mse_outflow}, MAE: {mae_outflow}, R²: {r2_outflow}")

Inflow Prediction - MSE: 59325053.821309775, MAE: 3341.37424013019, R²: 0.14871180399009054
Outflow Prediction - MSE: 68571246.69068722, MAE: 3117.2470524495166, R²: -0.015432308097634628


In [189]:
# For inflow
accuracy_inflow = accuracy_score(y_test_inflow_class, pred_inflow_class)
precision_inflow = precision_score(y_test_inflow_class, pred_inflow_class)
recall_inflow = recall_score(y_test_inflow_class, pred_inflow_class)
f1_inflow = f1_score(y_test_inflow_class, pred_inflow_class)
confusion_matrix_inflow = confusion_matrix(y_test_inflow_class, pred_inflow_class)

# Convert R² score to percentage
accuracy_inflow = r2_inflow * 100

# For outflow
accuracy_outflow = accuracy_score(y_test_outflow_class, pred_outflow_class)
precision_outflow = precision_score(y_test_outflow_class, pred_outflow_class)
recall_outflow = recall_score(y_test_outflow_class, pred_outflow_class)
f1_outflow = f1_score(y_test_outflow_class, pred_outflow_class)
confusion_matrix_outflow = confusion_matrix(y_test_outflow_class, pred_outflow_class)

# Convert R² score to percentage
accuracy_outflow = r2_outflow * 100


In [190]:
# Print metrics for inflow
print(f"Inflow - Accuracy: {accuracy_inflow:.4f}")
print(f"Inflow - Precision: {precision_inflow:.4f}")
print(f"Inflow - Recall: {recall_inflow:.4f}")
print(f"Inflow - F1 Score: {f1_inflow:.4f}")
print("Inflow - Confusion Matrix:")
print(confusion_matrix_inflow)

# Print metrics for outflow
print(f"Outflow - Accuracy: {accuracy_outflow:.4f}")
print(f"Outflow - Precision: {precision_outflow:.4f}")
print(f"Outflow - Recall: {recall_outflow:.4f}")
print(f"Outflow - F1 Score: {f1_outflow:.4f}")
print("Outflow - Confusion Matrix:")
print(confusion_matrix_outflow)



Inflow - Accuracy: 14.8712
Inflow - Precision: 1.0000
Inflow - Recall: 0.9985
Inflow - F1 Score: 0.9992
Inflow - Confusion Matrix:
[[  0   0]
 [  1 662]]
Outflow - Accuracy: -1.5432
Outflow - Precision: 0.9970
Outflow - Recall: 1.0000
Outflow - F1 Score: 0.9985
Outflow - Confusion Matrix:
[[  0   2]
 [  0 661]]


In [191]:
# Print the accuracy in percentage
print(f"Inflow Prediction Accuracy: {accuracy_inflow:.2f}%")
print(f"Outflow Prediction Accuracy: {accuracy_outflow:.2f}%")

Inflow Prediction Accuracy: 14.87%
Outflow Prediction Accuracy: -1.54%


In [192]:
# Print evaluation metrics for inflow
print(f"Inflow Prediction - MSE: {mse_inflow}, MAE: {mae_inflow}, R²: {r2_inflow}")

# Print evaluation metrics for outflow
print(f"Outflow Prediction - MSE: {mse_outflow}, MAE: {mae_outflow}, R²: {r2_outflow}")

Inflow Prediction - MSE: 59325053.821309775, MAE: 3341.37424013019, R²: 0.14871180399009054
Outflow Prediction - MSE: 68571246.69068722, MAE: 3117.2470524495166, R²: -0.015432308097634628


In [193]:
# Save the trained models as .pkl files for later use
with open('rf_inflow_model.pkl', 'wb') as inflow_model_file5:
    pickle.dump(rf_inflow, inflow_model_file5)

In [194]:
print(type(rf_inflow)) 

<class 'sklearn.ensemble._forest.RandomForestRegressor'>


In [195]:
# Save the trained models as .pkl files for later use
with open('rf_outflow_model.pkl', 'wb') as inflow_model_file6:
    pickle.dump(rf_inflow, inflow_model_file6)

In [196]:
print(type(rf_outflow)) 

<class 'sklearn.ensemble._forest.RandomForestRegressor'>
