In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor

# Generate synthetic data
def create_synthetic_data():
    np.random.seed(42)
    data = {
        'Population_Density': np.random.randint(500, 5000, 100),
        'Water_Access': np.random.randint(50, 100, 100),
        'Sanitation_Method': np.random.choice(['Composting', 'Sewer System', 'Waterless Toilet', 'Pit Latrine'], 100),
        'Health_Index': np.random.randint(50, 100, 100),
        'Cost': np.random.randint(100, 5000, 100),
        'Impact_Score': np.random.randint(50, 100, 100)
    }
    df = pd.DataFrame(data)
    return df

# Prepare the data (one-hot encoding for categorical features)
def preprocess_data(df):
    df = pd.get_dummies(df, columns=['Sanitation_Method'], drop_first=True)
    X = df.drop('Impact_Score', axis=1)
    y = df['Impact_Score']
    return X, y

# Train the model
def train_model(X, y):
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X, y)
    return model

# Function to predict impact score for new user input
def predict_impact(model, user_input, X_columns):
    # Ensure user_input is in the same format as training data
    user_df = pd.DataFrame([user_input])
    user_df = pd.get_dummies(user_df, columns=['Sanitation_Method'], drop_first=True)
    
    # Ensure all necessary columns are present (handle any missing columns)
    for col in X_columns:
        if col not in user_df.columns:
            user_df[col] = 0
    
    prediction = model.predict(user_df)
    return prediction[0]

# Generate synthetic data and train the model
df = create_synthetic_data()
X, y = preprocess_data(df)
model = train_model(X, y)

# Function to take user input
def get_user_input():
    population_density = int(input("Enter population density (people per square km): "))
    water_access = int(input("Enter water access (% of population): "))
    sanitation_method = input("Enter sanitation method (Composting, Sewer System, Waterless Toilet, Pit Latrine): ")
    health_index = int(input("Enter health index (0-100): "))
    cost = int(input("Enter cost of solution (USD): "))
    
    user_input = {
        'Population_Density': population_density,
        'Water_Access': water_access,
        'Sanitation_Method': sanitation_method,
        'Health_Index': health_index,
        'Cost': cost
    }
    
    return user_input

# Take user input and predict impact score
user_input = get_user_input()
impact_score = predict_impact(model, user_input, X.columns)
print(f"The predicted sustainability impact score is: {impact_score:.2f}")

Enter population density (people per square km):  5657
