In [4]:
from joblib import load
model = load('random_forest_model.joblib')


In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import LabelEncoder

In [5]:

# Load the data
df = pd.read_csv('combined_rainfall_data.csv')

# Convert date to datetime
df['date'] = pd.to_datetime(df['date'],format="%Y-%m-%d")

# Create additional time-based features
df['year'] = df['date'].dt.year
df['month'] = df['date'].dt.month
df['day'] = df['date'].dt.day
df['dayofweek'] = df['date'].dt.dayofweek

# Create lag features (previous day's rainfall)
df['rainfall_lag1'] = df.groupby(['lat', 'lon'])['rainfall'].shift(1)

# Drop rows with NaN values
df = df.dropna()

# Prepare features and target
features = ['lat', 'lon', 'year', 'month', 'day', 'dayofweek', 'rainfall_lag1']
X = df[features]
y = df['rainfall']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
print(f"Root Mean Squared Error: {rmse}")

# Function to predict rainfall for a given date and location
def predict_rainfall(date, lat, lon):
    # Create a DataFrame with the input data
    input_data = pd.DataFrame({
        'date': [pd.to_datetime(date)],
        'lat': [lat],
        'lon': [lon]
    })
    
    # Add time-based features
    input_data['year'] = input_data['date'].dt.year
    input_data['month'] = input_data['date'].dt.month
    input_data['day'] = input_data['date'].dt.day
    input_data['dayofweek'] = input_data['date'].dt.dayofweek
    
    # Add lag feature (you might want to update this based on your data)
    input_data['rainfall_lag1'] = df[(df['lat'] == lat) & (df['lon'] == lon)]['rainfall'].iloc[-1]
    
    # Make prediction
    prediction = model.predict(input_data[features])
    
    return prediction[0]

# Example usage


Root Mean Squared Error: 13.350110325370766


In [7]:
predict_rainfall('2020-01-01', 18.875,73.125)

0.00015785830793901698