In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import numpy as np

# Load datasets
flood_data = pd.read_csv('/content/Historical_flood.csv')
rainfall_data = pd.read_csv('/content/rainfall_demo.csv')
river_levels = pd.read_csv('/content/river_demo.csv')
urban_data = pd.read_csv('/content/urban.csv')

# Renaming columns to avoid duplication during merge
flood_data = flood_data.rename(columns={'date': 'flood_date', 'location': 'flood_location'})
rainfall_data = rainfall_data.rename(columns={'date': 'rain_date', 'location': 'rain_location'})
river_levels = river_levels.rename(columns={'date': 'river_date', 'location': 'river_location'})

# Merging the datasets on the renamed columns
data = flood_data.merge(rainfall_data, left_on=['flood_date', 'flood_location'], right_on=['rain_date', 'rain_location'])
data = data.merge(river_levels, left_on=['flood_date', 'flood_location'], right_on=['river_date', 'river_location'])
data = data.drop(columns=['rain_date', 'rain_location', 'river_date', 'river_location'])
data = data.merge(urban_data, left_on='flood_location', right_on='location')
data = data.drop(columns=['location'])
data = data.rename(columns={'flood_date': 'date', 'flood_location': 'location'})

# Create new features
data['rolling_rainfall'] = data['rainfall'].rolling(window=3).mean()
data['river_level_change'] = data['river_level'].diff()
data = data.dropna()  # Drop rows with NaN values

# Define features and target variable
features = ['rainfall', 'river_level', 'urbanization_index', 'rolling_rainfall', 'river_level_change']
target = 'flood'

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data[features], data[target], test_size=0.2, random_state=42)

# Train a Random Forest classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predict on the test set
y_pred = model.predict(X_test)

# Evaluate the model
print(classification_report(y_test, y_pred))

# Extend the dataset for future dates
future_dates = pd.date_range(start='2023-01-08', periods=30, freq='D')  # Example future dates
future_data = pd.DataFrame({
    'date': future_dates,
    'location': ['Lagos Mainland'] * len(future_dates),
    'rainfall': np.random.randint(0, 100, len(future_dates)),  # Simulate future rainfall
    'river_level': np.random.randint(4, 10, len(future_dates)),  # Simulate future river levels
    'urbanization_index': [80] * len(future_dates)
})

# Preprocess future data similar to training data
future_data['rolling_rainfall'] = future_data['rainfall'].rolling(window=3).mean().fillna(method='backfill')
future_data['river_level_change'] = future_data['river_level'].diff().fillna(0)

# Predict flood events for future dates
future_data['flood_prediction'] = model.predict(future_data[features])

# Identify likely flood dates
likely_flood_dates = future_data[future_data['flood_prediction'] == 1]['date']

print("Likely flood dates:")
print(likely_flood_dates)

# Save predictions to a CSV file
future_data.to_csv('future_flood_predictions.csv', index=False)


              precision    recall  f1-score   support

           0       1.00      1.00      1.00         1

    accuracy                           1.00         1
   macro avg       1.00      1.00      1.00         1
weighted avg       1.00      1.00      1.00         1

Likely flood dates:
0    2023-01-08
2    2023-01-10
8    2023-01-16
11   2023-01-19
15   2023-01-23
16   2023-01-24
17   2023-01-25
18   2023-01-26
19   2023-01-27
22   2023-01-30
25   2023-02-02
26   2023-02-03
27   2023-02-04
28   2023-02-05
Name: date, dtype: datetime64[ns]
