In [1]:
!pip install scikit-learn



In [2]:
import pandas as pd
import numpy as np

np.random.seed(42)  # reproducibility

def generate_donor_ngo_data_v2(n=1000):
    data = []
    for _ in range(n):
        distance = round(np.random.uniform(0.5, 20.0), 2)
        ngo_capacity = np.random.randint(10, 101)

        # Food quantity up to 2x NGO capacity
        food_quantity = np.random.randint(1, int(2 * ngo_capacity) + 1)

        food_type = np.random.choice(["veg", "nonveg"])
        food_category = np.random.choice(["cooked", "grocery"], p=[0.5, 0.5])
        accepts_nonveg = np.random.choice([0, 1], p=[0.4, 0.6])

        # Compatibility logic
        type_compatible = 1 if (food_type == "veg" or accepts_nonveg == 1) else 0

        # Expiry based on food category
        if food_category == "cooked":
            time_until_expiry = np.random.randint(1, 13)
        else:  # grocery
            time_until_expiry = np.random.randint(6, 37)

        match_successful = 1 if (
            distance < 10 and
            food_quantity <= 1.5 * ngo_capacity and
            type_compatible == 1 and
            time_until_expiry >= 4
        ) else 0

        data.append([
            distance, food_quantity, ngo_capacity, food_type, food_category,
            accepts_nonveg, type_compatible, time_until_expiry, match_successful
        ])

    columns = [
        "distance_km", "food_quantity", "ngo_capacity", "food_type",
        "food_category", "accepts_nonveg", "type_compatible",
        "time_until_expiry_hrs", "match_successful"
    ]

    return pd.DataFrame(data, columns=columns)

# Usage
df = generate_donor_ngo_data_v2(1000)
df.to_csv("donor_ngo_final_dataset_v2.csv", index=False)
print("✅ Dataset saved with shape:", df.shape)


✅ Dataset saved with shape: (1000, 9)


In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report
import joblib

# Load dataset
df = pd.read_csv("donor_ngo_final_dataset_v2.csv")

# Drop the type_compatible column (automatically derived during generation)
X = df.drop(columns=["match_successful", "type_compatible"])
y = df["match_successful"]

# Categorical and numerical features
categorical_features = ["food_type", "food_category"]
numerical_features = [col for col in X.columns if col not in categorical_features]

# Preprocessing pipeline
preprocessor = ColumnTransformer(
    transformers=[
        ("cat", OneHotEncoder(handle_unknown="ignore"), categorical_features)
    ],
    remainder="passthrough"
)

# Full pipeline
clf = Pipeline(steps=[
    ("preprocessor", preprocessor),
    ("classifier", RandomForestClassifier(n_estimators=100, random_state=42))
])

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
clf.fit(X_train, y_train)

# Evaluate
y_pred = clf.predict(X_test)
print("\n📊 Classification Report:\n")
print(classification_report(y_test, y_pred))

# Save model
joblib.dump(clf, 'donor_ngo_match_model.pkl')
print("✅ Model saved as donor_ngo_match_model.pkl")



📊 Classification Report:

              precision    recall  f1-score   support

           0       0.96      0.94      0.95       145
           1       0.86      0.89      0.88        55

    accuracy                           0.93       200
   macro avg       0.91      0.92      0.91       200
weighted avg       0.93      0.93      0.93       200

✅ Model saved as donor_ngo_match_model.pkl


In [4]:
import joblib
import pandas as pd

# Load saved model
model = joblib.load("donor_ngo_match_model.pkl")

# Example input – replace these values with dynamic ones as needed
donor_input = {
    "distance_km": 10.8,
    "food_quantity": 75,
    "ngo_capacity": 50,
    "food_type": "veg",
    "food_category": "cooked",
    "accepts_nonveg": 0,
    "time_until_expiry_hrs": 8
}

# Convert to DataFrame
input_df = pd.DataFrame([donor_input])

# Predict
prediction = model.predict(input_df)[0]
probability = model.predict_proba(input_df)[0][1]  # Confidence score for class 1

# Output result
if prediction == 1:
    print(f"✅ Match Successful (Confidence: {probability:.2f})")
else:
    print(f"❌ Not a Suitable Match (Confidence: {probability:.2f})")


❌ Not a Suitable Match (Confidence: 0.01)
