# Import Required Libraries

Import the necessary libraries for data processing, model training, and saving the model as a `.pkl` file.

In [None]:
# Import Required Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler, Normalizer
from sklearn.impute import SimpleImputer
import pickle

# Load the Dataset

Load the rainfall dataset using pandas. Update the file path as needed for your environment.

In [None]:
# Load the Dataset
df = pd.read_csv('Rainfall.csv')
df.head()

# Preprocess the Data

Handle missing values, encode categorical variables, drop unnecessary columns, and split the data into features and target.

In [None]:
# Drop unnecessary columns if present
if 'day' in df.columns:
    df = df.drop(columns=['day'])

# Handle missing values for 'winddirection' and 'windspeed'
if '         winddirection' in df.columns:
    df['         winddirection'] = df['         winddirection'].fillna(df['         winddirection'].mode()[0])
if 'windspeed' in df.columns:
    df['windspeed'] = df['windspeed'].fillna(df['windspeed'].median())

# Encode 'rainfall' column: yes->1, no->0
if df['rainfall'].dtype == object:
    df['rainfall'] = df['rainfall'].map({'yes': 1, 'no': 0})

# Drop highly correlated columns if present
for col in ['maxtemp', 'temparature', 'mintemp']:
    if col in df.columns:
        df = df.drop(columns=[col])

# Split features and target
X = df.drop(columns=['rainfall'])
y = df['rainfall']

# Standardize and normalize features
scaler = StandardScaler()
normalizer = Normalizer()
imputer = SimpleImputer(strategy='mean')

X_scaled = scaler.fit_transform(X)
X_imputed = imputer.fit_transform(X_scaled)
X_final = normalizer.fit_transform(X_imputed)

In [None]:
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_final, y, test_size=0.2, random_state=42)

# Train the Random Forest Model

Train a RandomForestClassifier on the preprocessed data.

In [None]:
# Train the Random Forest Model
rf = RandomForestClassifier(random_state=42)
rf.fit(X_train, y_train)

# Save the Model as a .pkl File

Serialize the trained model and feature columns using pickle and save as a `.pkl` file.

In [None]:
# Save the model and feature columns as a .pkl file
model_data = {
    "model": rf,
    "columns": list(X.columns)
}
with open("rainfall_model.pkl", "wb") as f:
    pickle.dump(model_data, f)

The trained rainfall prediction model has been saved as `rainfall_model.pkl` and is ready for use.