<a href="https://colab.research.google.com/github/PRIYANKAM05/AI-driven-water-demand-forecasting/blob/main/water_demand_forecasting.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [12]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objs as go
from plotly.subplots import make_subplots
import plotly.express as px


In [2]:
df = pd.read_csv('chennai_reservoir_levels.csv',
                parse_dates=['Date'], dayfirst=True)
df.head()

Unnamed: 0,Date,POONDI,CHOLAVARAM,REDHILLS,CHEMBARAMBAKKAM
0,2004-01-01,3.9,0.0,268.0,0.0
1,2004-01-02,3.9,0.0,268.0,0.0
2,2004-01-03,3.9,0.0,267.0,0.0
3,2004-01-04,3.9,0.0,267.0,0.0
4,2004-01-05,3.8,0.0,267.0,0.0


In [3]:
df.dtypes

Unnamed: 0,0
Date,datetime64[ns]
POONDI,float64
CHOLAVARAM,float64
REDHILLS,float64
CHEMBARAMBAKKAM,float64


In [4]:
df.isnull().sum()

Unnamed: 0,0
Date,0
POONDI,0
CHOLAVARAM,0
REDHILLS,0
CHEMBARAMBAKKAM,0


In [5]:
fig = make_subplots(rows=2, cols=2,
                    subplot_titles=['Poondi Reserviour (in mcft)'])

fig.add_trace(go.Scatter(x=df.Date, y=df.POONDI, name='Poondi'), row=1, col=1)
fig.add_trace(go.Scatter(x=df.Date, y=df.REDHILLS, name='Redhills'), row=1, col=2)
fig.add_trace(go.Scatter(x=df.Date, y=df.CHEMBARAMBAKKAM, name='Chembarambakkam'), row=2, col=1)
fig.add_trace(go.Scatter(x=df.Date, y=df.CHOLAVARAM, name='Cholavaram'), row=2, col=2)

fig.update_layout(title_text=f"Water availability of Chennai's four major water resorviour ({df.Date.dt.year.min()} - {df.Date.dt.year.max()})")
fig.show()

In [6]:
df.head()

Unnamed: 0,Date,POONDI,CHOLAVARAM,REDHILLS,CHEMBARAMBAKKAM
0,2004-01-01,3.9,0.0,268.0,0.0
1,2004-01-02,3.9,0.0,268.0,0.0
2,2004-01-03,3.9,0.0,267.0,0.0
3,2004-01-04,3.9,0.0,267.0,0.0
4,2004-01-05,3.8,0.0,267.0,0.0


In [7]:
df_tidy = df.melt(id_vars=['Date'], var_name='Reservoir', value_name='Water_level')
df_tidy.head()

Unnamed: 0,Date,Reservoir,Water_level
0,2004-01-01,POONDI,3.9
1,2004-01-02,POONDI,3.9
2,2004-01-03,POONDI,3.9
3,2004-01-04,POONDI,3.9
4,2004-01-05,POONDI,3.8


In [10]:
!pip install plotly




In [13]:


fig = px.line(df_tidy,
       x='Date',
       y='Water_level',
       facet_row='Reservoir',
       facet_col_wrap=1,
       color='Reservoir',
        height=1200, width=1500,
       title=f"Water availability of Chennai's four major water resorviour ({df.Date.dt.year.min()} - {df.Date.dt.year.max()})"
       )
fig.update_yaxes(matches=None)
fig.show()

In [14]:
df['Total'] = df.drop(columns='Date').sum(axis=1)
df.head()

Unnamed: 0,Date,POONDI,CHOLAVARAM,REDHILLS,CHEMBARAMBAKKAM,Total
0,2004-01-01,3.9,0.0,268.0,0.0,271.9
1,2004-01-02,3.9,0.0,268.0,0.0,271.9
2,2004-01-03,3.9,0.0,267.0,0.0,270.9
3,2004-01-04,3.9,0.0,267.0,0.0,270.9
4,2004-01-05,3.8,0.0,267.0,0.0,270.8


In [15]:
px.line(df,
       x='Date',
       y='Total',
       title='Total water availability from all four reservoirs (in mcft)')

In [16]:
rain_df = pd.read_csv('chennai_reservoir_rainfall.csv',
                parse_dates=['Date'], dayfirst=True)
rain_df.head()

Unnamed: 0,Date,POONDI,CHOLAVARAM,REDHILLS,CHEMBARAMBAKKAM
0,2004-01-01,0.0,0.0,0.0,0.0
1,2004-01-02,0.0,0.0,0.0,0.0
2,2004-01-03,0.0,0.0,0.0,0.0
3,2004-01-04,0.0,0.0,0.0,0.0
4,2004-01-05,0.0,0.0,0.0,0.0


In [17]:
rain_df.dtypes

Unnamed: 0,0
Date,datetime64[ns]
POONDI,float64
CHOLAVARAM,float64
REDHILLS,float64
CHEMBARAMBAKKAM,float64


In [18]:
fig = px.line(rain_df.melt(id_vars='Date', var_name='Reservoir', value_name='Rainfall'),
       x='Date',
       y='Rainfall',
       facet_col='Reservoir',
       facet_col_wrap=2,
       color='Reservoir',
       title='Daily rainfall in Chennai'
       )
fig.update_yaxes(matches=None)
fig.show()

In [19]:
rain_df['YearMonth'] = pd.to_datetime(rain_df.Date.dt.year.astype(str) + rain_df.Date.dt.month.astype(str), format='%Y%m')
rain_df.head()

Unnamed: 0,Date,POONDI,CHOLAVARAM,REDHILLS,CHEMBARAMBAKKAM,YearMonth
0,2004-01-01,0.0,0.0,0.0,0.0,2004-01-01
1,2004-01-02,0.0,0.0,0.0,0.0,2004-01-01
2,2004-01-03,0.0,0.0,0.0,0.0,2004-01-01
3,2004-01-04,0.0,0.0,0.0,0.0,2004-01-01
4,2004-01-05,0.0,0.0,0.0,0.0,2004-01-01


In [20]:
rain_df.YearMonth.value_counts()

Unnamed: 0_level_0,count
YearMonth,Unnamed: 1_level_1
2004-01-01,31
2013-03-01,31
2013-07-01,31
2013-08-01,31
2013-10-01,31
...,...
2007-02-01,28
2014-02-01,28
2010-02-01,28
2018-02-01,28


In [21]:
rain_df['Total'] = rain_df.drop(columns=['Date', 'YearMonth']).sum(axis=1)
rain_df.head()

Unnamed: 0,Date,POONDI,CHOLAVARAM,REDHILLS,CHEMBARAMBAKKAM,YearMonth,Total
0,2004-01-01,0.0,0.0,0.0,0.0,2004-01-01,0.0
1,2004-01-02,0.0,0.0,0.0,0.0,2004-01-01,0.0
2,2004-01-03,0.0,0.0,0.0,0.0,2004-01-01,0.0
3,2004-01-04,0.0,0.0,0.0,0.0,2004-01-01,0.0
4,2004-01-05,0.0,0.0,0.0,0.0,2004-01-01,0.0


In [22]:
rain_df_monthly = rain_df.groupby('YearMonth')['Total'].sum().reset_index()
rain_df_monthly.head()

Unnamed: 0,YearMonth,Total
0,2004-01-01,111.0
1,2004-02-01,0.0
2,2004-03-01,0.0
3,2004-04-01,26.0
4,2004-05-01,906.0


In [24]:
def season_convert(dt):
    if 1<=dt.month<=2:
        season = 'Winter'
    elif 3<=dt.month<=5:
        season = 'Summer'
    elif 6<=dt.month<=9:
        season = 'Monsoon'
    else:
        season = 'Post-Monsoon'

    return season

In [25]:
rain_df_monthly['Season'] = rain_df_monthly['YearMonth'].apply(lambda x: season_convert(x))
rain_df_monthly.head()

Unnamed: 0,YearMonth,Total,Season
0,2004-01-01,111.0,Winter
1,2004-02-01,0.0,Winter
2,2004-03-01,0.0,Summer
3,2004-04-01,26.0,Summer
4,2004-05-01,906.0,Summer


In [26]:
px.bar(rain_df_monthly,
      x='YearMonth',
      y='Total',
      color = 'Season'
      )

In [30]:
rain_df['Year'] = pd.to_datetime(rain_df.Date.dt.year.astype(str), format='%Y')
rain_df.head()

Unnamed: 0,Date,POONDI,CHOLAVARAM,REDHILLS,CHEMBARAMBAKKAM,YearMonth,Total,Year
0,2004-01-01,0.0,0.0,0.0,0.0,2004-01-01,0.0,2004-01-01
1,2004-01-02,0.0,0.0,0.0,0.0,2004-01-01,0.0,2004-01-01
2,2004-01-03,0.0,0.0,0.0,0.0,2004-01-01,0.0,2004-01-01
3,2004-01-04,0.0,0.0,0.0,0.0,2004-01-01,0.0,2004-01-01
4,2004-01-05,0.0,0.0,0.0,0.0,2004-01-01,0.0,2004-01-01


In [31]:
rain_data = rain_df.groupby('Year')['Total'].sum().reset_index()

fig = px.bar(
    rain_data,
    x='Year',
    y='Total',
    title='Yearly Rainfall in Chennai',
    color='Total',
    color_continuous_scale=px.colors.sequential.Viridis,
)

fig.update_layout(
    title_font=dict(size=24, color='black', family='Arial, Bold'),
    xaxis_title='Year',
    yaxis_title='Total Rainfall (in mm)',
    margin=dict(l=40, r=40, t=80, b=40)
)

fig.show()

In [32]:
march_data = df.query('Date.dt.month == 3 and Date.dt.day == 1')

fig = px.bar(
    march_data,
    x='Date',
    y='Total',
    title='Total Water Availability at the Beginning of Summer',
    color_discrete_sequence=['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728']  # Example color sequence
)

fig.update_layout(
    title_font=dict(size=24, color='black', family='Arial, Bold'),
    xaxis_title='Date',
    yaxis_title='Total Water Availability (in mcft)',
    margin=dict(l=40, r=40, t=80, b=40)
)

fig.show()

In [33]:
X = df[['POONDI', 'CHOLAVARAM', 'REDHILLS', 'CHEMBARAMBAKKAM']]
Y = df['Total']

In [34]:
X.head()

Unnamed: 0,POONDI,CHOLAVARAM,REDHILLS,CHEMBARAMBAKKAM
0,3.9,0.0,268.0,0.0
1,3.9,0.0,268.0,0.0
2,3.9,0.0,267.0,0.0
3,3.9,0.0,267.0,0.0
4,3.8,0.0,267.0,0.0


In [35]:
Y.head()

Unnamed: 0,Total
0,271.9
1,271.9
2,270.9
3,270.9
4,270.8


In [36]:
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

print(f"Training set size: {X_train.shape[0]}")
print(f"Testing set size: {X_test.shape[0]}")

Training set size: 4945
Testing set size: 1237


In [37]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

linear_model = LinearRegression()
linear_model.fit(X_train, Y_train)
Y_pred_linear = linear_model.predict(X_test)

mse_linear = mean_squared_error(Y_test, Y_pred_linear)
r2_linear = r2_score(Y_test, Y_pred_linear)

print("\nLinear Regression:")
print(f"Mean Squared Error: {mse_linear:.2f}")
print(f"R-squared: {r2_linear:.2f}")


Linear Regression:
Mean Squared Error: 0.00
R-squared: 1.00


In [38]:
from sklearn.tree import DecisionTreeRegressor

tree_model = DecisionTreeRegressor()
tree_model.fit(X_train, Y_train)
Y_pred_tree = tree_model.predict(X_test)

mse_tree = mean_squared_error(Y_test, Y_pred_tree)
r2_tree = r2_score(Y_test, Y_pred_tree)

print("\nDecision Tree Regressor:")
print(f"Mean Squared Error: {mse_tree:.2f}")
print(f"R-squared: {r2_tree:.2f}")


Decision Tree Regressor:
Mean Squared Error: 3705.05
R-squared: 1.00


In [39]:
from sklearn.ensemble import RandomForestRegressor

rf_model = RandomForestRegressor()
rf_model.fit(X_train, Y_train)
Y_pred_rf = rf_model.predict(X_test)

mse_rf = mean_squared_error(Y_test, Y_pred_rf)
r2_rf = r2_score(Y_test, Y_pred_rf)

print("\nRandom Forest Regressor:")
print(f"Mean Squared Error: {mse_rf:.2f}")
print(f"R-squared: {r2_rf:.2f}")


Random Forest Regressor:
Mean Squared Error: 2159.34
R-squared: 1.00


In [40]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.preprocessing.image import ImageDataGenerator

num_samples = 1000
height, width, channels = 64, 64, 3
X = np.random.rand(num_samples, height, width, channels)
Y = np.random.randint(0, 2, size=(num_samples,))

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

print(f"Training set size: {X_train.shape[0]}")
print(f"Testing set size: {X_test.shape[0]}")

Training set size: 800
Testing set size: 200


In [41]:
model = Sequential()

# Convolutional Layer
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(height, width, channels)))
model.add(MaxPooling2D(pool_size=(2, 2)))

# Second Convolutional Layer
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

# Flatten the layers
model.add(Flatten())

# Fully Connected Layer
model.add(Dense(128, activation='relu'))
model.add(Dense(1, activation='sigmoid'))


Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



In [42]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [43]:
history = model.fit(X_train, Y_train, epochs=15, batch_size=32, validation_data=(X_test, Y_test))

Epoch 1/15
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 221ms/step - accuracy: 0.4940 - loss: 0.9566 - val_accuracy: 0.4950 - val_loss: 0.6932
Epoch 2/15
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 212ms/step - accuracy: 0.5300 - loss: 0.6922 - val_accuracy: 0.5050 - val_loss: 0.6931
Epoch 3/15
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 212ms/step - accuracy: 0.6236 - loss: 0.6899 - val_accuracy: 0.4950 - val_loss: 0.6942
Epoch 4/15
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 241ms/step - accuracy: 0.5180 - loss: 0.6860 - val_accuracy: 0.5200 - val_loss: 0.6937
Epoch 5/15
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 178ms/step - accuracy: 0.7443 - loss: 0.6697 - val_accuracy: 0.5050 - val_loss: 0.6988
Epoch 6/15
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 255ms/step - accuracy: 0.5433 - loss: 0.6707 - val_accuracy: 0.5150 - val_loss: 0.7011
Epoch 7/15
[1m25/25[0m 

In [44]:
loss, accuracy = model.evaluate(X_test, Y_test)
print(f"Test Accuracy: {accuracy:.2f}")

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 76ms/step - accuracy: 0.4458 - loss: 1.0031
Test Accuracy: 0.46


In [45]:
import joblib
joblib.dump(rf_model, 'random_forest_model.pkl')

['random_forest_model.pkl']

In [46]:
import joblib
import numpy as np

# Load the saved model
loaded_rf_model = joblib.load('random_forest_model.pkl')

# Get input from the user for each feature
poondi = float(input("Enter the value for POONDI: "))
cholavaram = float(input("Enter the value for CHOLAVARAM: "))
redhills = float(input("Enter the value for REDHILLS: "))
chembarambakkam = float(input("Enter the value for CHEMBARAMBAKKAM: "))

# Create a 2D array for the input data
user_input = np.array([[poondi, cholavaram, redhills, chembarambakkam]])

# Make predictions using the loaded model
prediction = loaded_rf_model.predict(user_input)

print("Prediction from the loaded model:", prediction[0])

Enter the value for POONDI: 3.9
Enter the value for CHOLAVARAM: 0.0
Enter the value for REDHILLS: 268.0
Enter the value for CHEMBARAMBAKKAM: 0.0
Prediction from the loaded model: 271.7470000000004



X does not have valid feature names, but RandomForestRegressor was fitted with feature names

