<a href="https://colab.research.google.com/github/Shibil-Basith/javalab/blob/main/rainfall_prediction_working.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import requests
import pandas as pd

# Define the API URL
url = "https://archive-api.open-meteo.com/v1/archive"

# Define request parameters
params = {
    "latitude": 10.8505,  # Kerala Latitude
    "longitude": 76.2711,  # Kerala Longitude
    "start_date": "1980-01-01",
    "end_date": "2024-12-31",
    "daily": [
        "temperature_2m_max",
        "temperature_2m_min",
        "precipitation_sum",
        "relative_humidity_2m_mean",  # ✅ Fixed humidity variable
        "wind_speed_10m_max",
        "surface_pressure_mean",  # ✅ Fixed pressure variable
        "cloud_cover_mean",
        "dew_point_2m_mean"
    ],
    "timezone": "Asia/Kolkata",
}


# Make API request
response = requests.get(url, params=params)
data = response.json()

# Check if the response contains 'daily' data
if "daily" in data:
    df = pd.DataFrame(data["daily"])
    df.to_csv("kerala_weather_data.csv", index=False)  # Save to CSV
    print(df.head())  # Display first few rows
else:
    print("Error fetching data:", data)  # Print error message

         time  temperature_2m_max  temperature_2m_min  precipitation_sum  \
0  1980-01-01                32.9                20.0                0.0   
1  1980-01-02                32.9                19.1                0.0   
2  1980-01-03                32.3                19.4                0.0   
3  1980-01-04                32.5                19.8                0.0   
4  1980-01-05                33.2                20.7                0.0   

   relative_humidity_2m_mean  wind_speed_10m_max  surface_pressure_mean  \
0                         67                 5.7                 1003.3   
1                         62                12.8                 1003.1   
2                         67                12.5                 1003.6   
3                         66                11.0                 1003.2   
4                         54                16.8                 1003.6   

   cloud_cover_mean  dew_point_2m_mean  
0                25               18.8  
1         

In [None]:
print(df.info())  # Shows column names, data types, and missing values
print(df.head())  # Displays the first 5 rows
print(df.describe())  # Shows summary statistics

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16437 entries, 0 to 16436
Data columns (total 9 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   time                       16437 non-null  object 
 1   temperature_2m_max         16437 non-null  float64
 2   temperature_2m_min         16437 non-null  float64
 3   precipitation_sum          16437 non-null  float64
 4   relative_humidity_2m_mean  16437 non-null  int64  
 5   wind_speed_10m_max         16437 non-null  float64
 6   surface_pressure_mean      16437 non-null  float64
 7   cloud_cover_mean           16437 non-null  int64  
 8   dew_point_2m_mean          16437 non-null  float64
dtypes: float64(6), int64(2), object(1)
memory usage: 1.1+ MB
None
         time  temperature_2m_max  temperature_2m_min  precipitation_sum  \
0  1980-01-01                32.9                20.0                0.0   
1  1980-01-02                32.9                19.1    

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import shap
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Bidirectional, Dense, Dropout

# Load Dataset
df = pd.read_csv("kerala_weather_data.csv")
df['time'] = pd.to_datetime(df['time'])  # Convert time column
df.set_index('time', inplace=True)  # Set time as index

# Handle missing values
df.fillna(method='ffill', inplace=True)

# Define Features & Target
features = df.drop(columns=['precipitation_sum'])  # Remove target column
target = df['precipitation_sum']  # Rainfall prediction

# Normalize Features
scaler = MinMaxScaler()
features_scaled = scaler.fit_transform(features)

# Split Data into Train & Test
X_train, X_test, y_train, y_test = train_test_split(features_scaled, target, test_size=0.2, shuffle=False)

# Reshape for LSTM Input
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

# Build BiLSTM Model
model = Sequential([
    Bidirectional(LSTM(64, return_sequences=True), input_shape=(X_train.shape[1], 1)),
    Dropout(0.2),
    Bidirectional(LSTM(32, return_sequences=False)),
    Dense(16, activation='relu'),
    Dense(1)
])

model.compile(optimizer='adam', loss='mse')

# Train Model
history = model.fit(X_train, y_train, epochs=50, batch_size=16, validation_data=(X_test, y_test))

# Predictions
y_pred = model.predict(X_test)

# Evaluate Model
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"RMSE: {rmse:.4f}")
print(f"MSE: {mse:.4f}")
print(f"MAE: {mae:.4f}")
print(f"R² Score: {r2:.4f}")

# SHAP Explainability
explainer = shap.Explainer(model, X_train)
shap_values = explainer(X_test[:100])

# SHAP Summary Plot
shap.summary_plot(shap_values, features.iloc[:100])

# Plot Predictions
plt.figure(figsize=(10, 5))
plt.plot(y_test.values, label="Actual")
plt.plot(y_pred, label="Predicted")
plt.legend()
plt.title("Rainfall Prediction using BiLSTM")
plt.show()

Epoch 1/50


  df.fillna(method='ffill', inplace=True)
  super().__init__(**kwargs)


[1m822/822[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 21ms/step - loss: 68.7358 - val_loss: 107.2612
Epoch 2/50
[1m822/822[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 20ms/step - loss: 41.6675 - val_loss: 90.6273
Epoch 3/50
[1m822/822[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 20ms/step - loss: 38.6672 - val_loss: 86.3229
Epoch 4/50
[1m822/822[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 20ms/step - loss: 35.9954 - val_loss: 95.7832
Epoch 5/50
[1m822/822[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 20ms/step - loss: 33.7626 - val_loss: 87.4482
Epoch 6/50
[1m822/822[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 19ms/step - loss: 32.6223 - val_loss: 84.4182
Epoch 7/50
[1m822/822[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 20ms/step - loss: 29.0521 - val_loss: 82.1283
Epoch 8/50
[1m822/822[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 19ms/step - loss: 32.5131 - val_loss: 113.1437
Epoch 9/50
[1m82

TypeError: 'numpy.ndarray' object is not callable

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import shap
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Bidirectional, Dense, Dropout, GRU

# Load Dataset
df = pd.read_csv("kerala_weather_data.csv")
df['time'] = pd.to_datetime(df['time'])  # Convert time column
df.set_index('time', inplace=True)  # Set time as index

# Handle Missing Values
df.fillna(method='ffill', inplace=True)

# Feature Engineering
# Add lag features
for lag in range(1, 8):
    df[f'lag_{lag}'] = df['precipitation_sum'].shift(lag)

# Add rolling features
df['rolling_mean_3'] = df['precipitation_sum'].rolling(window=3).mean()
df['rolling_std_3'] = df['precipitation_sum'].rolling(window=3).std()
df['rolling_mean_7'] = df['precipitation_sum'].rolling(window=7).mean()

# Add seasonal features
df['month'] = df.index.month
df['day_of_year'] = df.index.day_of_year
df['hour'] = df.index.hour

# Drop rows with NaN after lagging
df.dropna(inplace=True)

# Define Features and Target
features = df.drop(columns=['precipitation_sum'])
target = df['precipitation_sum']

# Normalize Features
scaler = MinMaxScaler()
features_scaled = scaler.fit_transform(features)

# Split Data into Train & Test
X_train, X_test, y_train, y_test = train_test_split(features_scaled, target, test_size=0.2, shuffle=False)

# Reshape for LSTM Input
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

# Build GRU-LSTM Hybrid Model
model = Sequential([
    Bidirectional(LSTM(128, return_sequences=True), input_shape=(X_train.shape[1], 1)),
    Dropout(0.3),
    Bidirectional(GRU(64, return_sequences=True)),
    Dropout(0.3),
    Bidirectional(LSTM(32, return_sequences=False)),
    Dense(16, activation='relu'),
    Dense(1)
])

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss='mse')

# Train Model
history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test),
                    callbacks=[
                        tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True),
                        tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5)
                    ])

# Predictions
y_pred = model.predict(X_test)

# Evaluate Model
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"RMSE: {rmse:.4f}")
print(f"MSE: {mse:.4f}")
print(f"MAE: {mae:.4f}")
print(f"R² Score: {r2:.4f}")

  df.fillna(method='ffill', inplace=True)
  super().__init__(**kwargs)


Epoch 1/100
[1m411/411[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 113ms/step - loss: 67.7916 - val_loss: 92.7700 - learning_rate: 0.0010
Epoch 2/100
[1m411/411[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 112ms/step - loss: 48.8438 - val_loss: 93.8191 - learning_rate: 0.0010
Epoch 3/100
[1m411/411[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 115ms/step - loss: 48.4987 - val_loss: 90.8250 - learning_rate: 0.0010
Epoch 4/100
[1m411/411[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 112ms/step - loss: 39.6668 - val_loss: 70.9190 - learning_rate: 0.0010
Epoch 5/100
[1m411/411[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 112ms/step - loss: 26.4156 - val_loss: 56.8039 - learning_rate: 0.0010
Epoch 6/100
[1m411/411[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m85s[0m 120ms/step - loss: 24.9902 - val_loss: 58.6650 - learning_rate: 0.0010
Epoch 7/100
[1m411/411[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 116ms/step - loss: 21

TypeError: 'numpy.ndarray' object is not callable

In [None]:
model.save("rainfall_prediction_model.h5")



In [None]:
# SHAP Explainability
X_train_summary = X_train[:100]
explainer = shap.DeepExplainer(model, X_train_summary)
shap_values = explainer.shap_values(X_test[:100])

# SHAP Summary Plot
shap.summary_plot(shap_values[0], features.iloc[:100])

# Plot Predictions
plt.figure(figsize=(10, 5))
plt.plot(y_test.values, label="Actual")
plt.plot(y_pred, label="Predicted")
plt.legend()
plt.title("Rainfall Prediction using Hybrid GRU-LSTM")
plt.show()



StagingError: in user code:

    File "/usr/local/lib/python3.11/dist-packages/shap/explainers/_deep/deep_tf.py", line 269, in grad_graph  *
        x_grad = tape.gradient(out, shap_rAnD)

    LookupError: gradient registry has no entry for: shap_TensorListStack
