In [2]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import numpy as np
import io
import joblib

# --- 1. DATASET LOADING ---

# Load the dataset from a CSV file
# Please replace 'coastal_resilience_dataset.csv' with your actual file path
# If your path is on Windows, use a raw string like r"C:\path\to\file.csv"
df=pd.read_csv('coastal_resilience_dataset.csv')

# --- 2. DATA PREPARATION FOR THE MODEL ---

# Define features (X) and target (y)
# X = df.drop('Flood_Risk_Label', axis=1)
# y = df['Flood_Risk_Label']

features = ['Latitude', 'Longitude']
X = df[features]
y = df['Flood_Risk_Label']
# Define categorical and numerical features for preprocessing
# categorical_features = ['Land_Use', 'Soil_Type', 'Drainage_Capacity']
# numerical_features = ['Elevation', 'Avg_Precipitation', 'Coastal_Distance', 'Historical_Floods']
# # We'll also use Latitude and Longitude as features for the model
# all_features = numerical_features + categorical_features + ['Latitude', 'Longitude']

# # Create a preprocessing pipeline for categorical and numerical data.
# # OneHotEncoder will convert categorical features into numerical ones.
# preprocessor = ColumnTransformer(
#     transformers=[
#         ('num', 'passthrough', numerical_features),
#         ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features),
#         ('loc', 'passthrough', ['Latitude', 'Longitude']) # Keep lat/lon as-is for model training
#     ],
#     remainder='drop'
# )

# # Create a full machine learning pipeline with the preprocessor and a classifier
# model_pipeline = Pipeline(steps=[
#     ('preprocessor', preprocessor),
#     ('classifier', RandomForestClassifier(n_estimators=100, random_state=42))
# ])
model_pipeline = RandomForestClassifier(n_estimators=100, random_state=42)
# Train the model on the entire dataset
# In a real-world scenario, you would use train_test_split to validate the model's performance.
# For this script, we'll train on everything to ensure it can predict based on the provided data points.
model_pipeline.fit(X, y)

# --- 3. MODEL SAVING ---
# This section saves the trained model pipeline to a file so it can be loaded later.
# The `joblib` library is a good choice for this, especially for scikit-learn models.
model_filename = 'coastal_risk_model.joblib'
joblib.dump(model_pipeline, model_filename)
print(f"\nModel successfully saved to {model_filename}")

# --- 4. PREDICTION FUNCTION ---

def predict_risk(lat_input, lon_input):
    """
    Predicts the flood risk for a given latitude and longitude.

    Args:
        lat_input (float): The geographic latitude of the location.
        lon_input (float): The geographic longitude of the location.

    Returns:
        str: The predicted flood risk label ('Low', 'Moderate', or 'High').
    """
    # Create a DataFrame for the new input
    # Note: We don't have all the features for the user's location.
    # A simple but effective method is to find the nearest data point in our training data
    # and use its features for the prediction.
    
    # Calculate Euclidean distance to all points in the dataset
    # distances = np.sqrt(
    #     (df['Latitude'] - lat_input)**2 + 
    #     (df['Longitude'] - lon_input)**2
    # )

    # # Find the index of the nearest point
    # nearest_point_index = distances.idxmin()
    # nearest_point_features = df.loc[[nearest_point_index], all_features]
    input_df = pd.DataFrame([[lat_input, lon_input]], columns=['Latitude', 'Longitude'])
    # Get the prediction from the model pipeline
    prediction = model_pipeline.predict(input_df)
    
    return prediction[0]

# --- 5. USER INTERFACE AND EXECUTION ---

if __name__ == "__main__":
    print("Welcome to the Coastal Threat Risk Predictor!")
    print("Please enter the latitude and longitude for a coastal area to get a flood risk prediction.")

    try:
        user_lat = float(input("Enter Latitude: "))
        user_lon = float(input("Enter Longitude: "))

        predicted_risk = predict_risk(user_lat, user_lon)

        print(f"\nBased on the data for the nearest location, the predicted flood risk is: {predicted_risk}")
        
    except ValueError:
        print("Invalid input. Please enter numerical values for latitude and longitude.")


Model successfully saved to coastal_risk_model.joblib
Welcome to the Coastal Threat Risk Predictor!
Please enter the latitude and longitude for a coastal area to get a flood risk prediction.

Based on the data for the nearest location, the predicted flood risk is: Low
