In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

In [4]:
rainfall_df = pd.read_csv("dataset/kerala_rainfall.csv")
temperature_df = pd.read_csv("dataset/kerala_temperatures.csv")

In [8]:
merged_df = pd.merge(rainfall_df, temperature_df, on="YEAR")
merged_df

Unnamed: 0,SUBDIVISION,YEAR,JAN_x,FEB_x,MAR_x,APR_x,MAY_x,JUN_x,JUL_x,AUG_x,...,AUG_y,SEP_y,OCT_y,NOV_y,DEC_y,ANNUAL,JAN-FEB,MAR-MAY,JUN-SEP,OCT-DEC
0,KERALA,1901,28.7,44.7,51.6,160.0,174.7,824.6,743.0,357.5,...,30.39,30.47,29.97,27.31,24.49,28.96,23.27,31.46,31.27,27.25
1,KERALA,1902,6.7,2.6,57.3,83.9,134.5,390.9,1205.0,315.8,...,30.73,29.80,29.12,26.31,24.04,29.22,25.75,31.76,31.09,26.49
2,KERALA,1903,3.2,18.6,3.1,83.6,249.7,558.6,1022.5,420.2,...,29.98,29.85,29.04,26.08,23.65,28.47,24.24,30.71,30.92,26.26
3,KERALA,1904,23.7,3.0,32.2,71.5,235.7,1098.2,725.5,351.8,...,30.09,30.04,29.20,26.36,23.63,28.49,23.62,30.95,30.66,26.40
4,KERALA,1905,1.2,22.3,9.4,105.9,263.3,850.2,520.5,293.6,...,30.68,30.12,30.67,27.52,23.82,28.30,22.25,30.00,31.33,26.57
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
112,KERALA,2013,3.9,40.1,49.9,49.3,119.3,1042.7,830.2,369.7,...,30.76,31.04,30.27,27.83,25.37,29.81,25.58,32.58,31.33,27.83
113,KERALA,2014,4.6,10.3,17.9,95.7,251.0,454.4,677.8,733.9,...,31.32,30.68,30.29,28.05,25.08,29.72,24.90,31.82,32.00,27.81
114,KERALA,2015,3.1,5.8,50.1,214.1,201.8,563.6,406.0,252.2,...,31.52,31.55,31.04,28.10,25.67,29.90,25.74,31.68,31.87,28.27
115,KERALA,2016,2.4,3.8,35.9,143.0,186.4,522.2,412.3,325.5,...,31.79,31.66,31.98,30.11,28.01,31.63,28.33,34.57,32.28,30.03


In [9]:
# Example criteria: If the next month's rainfall is above 100 mm, classify it as rainy (1), otherwise sunny (0)
merged_df["NextMonthWeather"] = np.where(merged_df["ANNUAL"].shift(-1) > 100, 1, 0)

In [10]:
merged_df = merged_df.dropna()  # Remove rows with missing values
merged_df = merged_df.drop(["YEAR", "ANNUAL"], axis=1)  # Drop irrelevant columns

In [11]:
X = merged_df[["JAN_x", "FEB_x", "MAR_x", "APR_x", "MAY_x", "JUN_x", "JUL_x", "AUG_x", "SEP_x", "OCT_x", "NOV_x", "DEC_x", "JAN_y", "FEB_y", "MAR_y", "APR_y", "MAY_y", "JUN_y", "JUL_y", "AUG_y", "SEP_y", "OCT_y", "NOV_y", "DEC_y"]]
y = merged_df["NextMonthWeather"]

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [13]:
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

RandomForestClassifier(random_state=42)

In [14]:
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)

Accuracy: 1.0
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        24

    accuracy                           1.00        24
   macro avg       1.00      1.00      1.00        24
weighted avg       1.00      1.00      1.00        24



In [15]:
# Define new data for prediction (replace with actual data)
new_data = np.array([[10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0, 110.0, 120.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0]])

# Predict the next month's weather
predicted_weather = "Rainy" if clf.predict(new_data) == 1 else "Sunny"

print("Predicted Next Month's Weather:", predicted_weather)

Predicted Next Month's Weather: Sunny
