<a href="https://colab.research.google.com/github/Sai1116/MathMinds_E101/blob/main/hack.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [48]:
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
import random

np.random.seed(42)

zones = [
    "Gandhipuram",
    "Peelamedu",
    "RS_Puram",
    "Saibaba_Colony",
    "Ukkadam",
    "Singanallur"
]

rows = []
start_date = datetime(2024, 3, 1)

for _ in range(1000):

    date = start_date + timedelta(days=np.random.randint(0, 14))
    hour = np.random.randint(6, 23)

    pickup_zone = random.choice(zones)
    dropoff_zone = random.choice(zones)

    zone_demand_level = random.choice(["Low", "Medium", "High"])
    zone_worker_density = random.choice(["Low", "Medium", "High"])

    traffic_index = round(np.random.uniform(0.9, 1.6), 2)

    trip_distance_km = round(np.random.gamma(2.0, 2.5), 2)
    trip_duration_min = round(trip_distance_km * traffic_index * np.random.uniform(3, 5), 1)
    pickup_delay_min = round(np.random.uniform(2, 10) * traffic_index, 1)

    worker_rating = round(np.random.normal(4.6, 0.2), 2)
    worker_rating = min(max(worker_rating, 4.0), 5.0)
    acceptance_rate = round(np.random.uniform(0.6, 0.95), 2)

    base_fare = round(40 + trip_distance_km * 4, 2)
    surge_multiplier = 1.0
    incentive_bonus = 0

    if zone_demand_level == "High" and hour in [8, 9, 18, 19, 20]:
        surge_multiplier = round(np.random.uniform(1.2, 1.7), 2)
        incentive_bonus = random.choice([10, 20, 30])

    # ðŸš¨ Undocumented platform change (policy shock)
    if date >= datetime(2024, 3, 10) and pickup_zone in ["Ukkadam", "Singanallur"]:
        surge_multiplier *= 0.8  # silent cut

    total_fare = round(base_fare * surge_multiplier + incentive_bonus, 2)

    rows.append([
        date.strftime("%Y-%m-%d"),
        hour,
        pickup_zone,
        dropoff_zone,
        trip_distance_km,
        trip_duration_min,
        pickup_delay_min,
        traffic_index,
        worker_rating,
        acceptance_rate,
        zone_worker_density,
        zone_demand_level,
        base_fare,
        surge_multiplier,
        incentive_bonus,
        total_fare
    ])

columns = [
    "date", "hour", "pickup_zone", "dropoff_zone",
    "trip_distance_km", "trip_duration_min", "pickup_delay_min",
    "traffic_index", "worker_rating", "acceptance_rate",
    "zone_worker_density", "zone_demand_level",
    "base_fare", "surge_multiplier", "incentive_bonus", "total_fare"
]

df = pd.DataFrame(rows, columns=columns)
df.to_csv("gig_dataset.csv", index=False)


In [49]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

df = pd.read_csv("gig_dataset.csv")

# Encode categories
for col in ["pickup_zone", "zone_worker_density", "zone_demand_level"]:
    df[col] = LabelEncoder().fit_transform(df[col])

# âœ… BEHAVIOR-BASED TARGET (not rule-based)
df["assigned"] = (
    (df["pickup_delay_min"] < 6) &
    (df["acceptance_rate"] > 0.75)
).astype(int)

X = df[
    ["hour", "pickup_zone", "zone_demand_level",
     "zone_worker_density", "traffic_index", "pickup_delay_min"]
]
y = df["assigned"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

assignment_model = RandomForestClassifier(
    n_estimators=200,
    max_depth=7,
    random_state=42
)

assignment_model.fit(X_train, y_train)
y_pred = assignment_model.predict(X_test)

print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       0.94      0.84      0.89       156
           1       0.58      0.80      0.67        44

    accuracy                           0.83       200
   macro avg       0.76      0.82      0.78       200
weighted avg       0.86      0.83      0.84       200



In [50]:
# =========================
# MODEL 2: INCENTIVE PREDICTION (MINIMAL FIX)
# =========================

from sklearn.ensemble import RandomForestClassifier
from sklearn.calibration import CalibratedClassifierCV
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd

# -------------------------
# 1. Load dataset
# -------------------------
df = pd.read_csv("gig_dataset.csv")

# -------------------------
# 2. Encode categorical features (same as Model 1)
# -------------------------
from sklearn.preprocessing import LabelEncoder

cat_cols = ["pickup_zone", "zone_worker_density", "zone_demand_level"]
for col in cat_cols:
    df[col] = LabelEncoder().fit_transform(df[col])

# -------------------------
# 3. FIX: Create incentive target (MISSING EARLIER)
# -------------------------
df["incentive_active"] = (df["incentive_bonus"] > 0).astype(int)

# -------------------------
# 4. Features (UNCHANGED)
# -------------------------
X = df[
    [
        "hour",
        "pickup_zone",
        "zone_demand_level",
        "zone_worker_density",
        "traffic_index",
        "pickup_delay_min",
    ]
]

y_incentive = df["incentive_active"]

# -------------------------
# 5. Train-test split (UNCHANGED)
# -------------------------
X_train_i, X_test_i, y_train_i, y_test_i = train_test_split(
    X,
    y_incentive,
    test_size=0.2,
    random_state=42,
    stratify=y_incentive
)

# -------------------------
# 6. Base Random Forest (UNCHANGED)
# -------------------------
rf = RandomForestClassifier(
    n_estimators=300,
    max_depth=6,
    min_samples_leaf=10,   # reduces overconfidence
    class_weight="balanced",
    random_state=42
)

# -------------------------
# 7. Probability calibration (UNCHANGED)
# -------------------------
incentive_model = CalibratedClassifierCV(
    rf,
    method="isotonic",
    cv=3
)

incentive_model.fit(X_train_i, y_train_i)

# -------------------------
# 8. Threshold tuning (UNCHANGED)
# -------------------------
probs = incentive_model.predict_proba(X_test_i)[:, 1]

THRESHOLD = 0.45
y_pred_i = (probs >= THRESHOLD).astype(int)

# -------------------------
# 9. Evaluation (UNCHANGED)
# -------------------------
print("\nFINAL CALIBRATED INCENTIVE MODEL RESULTS")
print(classification_report(y_test_i, y_pred_i))



FINAL CALIBRATED INCENTIVE MODEL RESULTS
              precision    recall  f1-score   support

           0       0.99      1.00      1.00       180
           1       1.00      0.95      0.97        20

    accuracy                           0.99       200
   macro avg       1.00      0.97      0.99       200
weighted avg       1.00      0.99      0.99       200



In [51]:
df["date"] = pd.to_datetime(df["date"])
df["pay_per_km"] = df["total_fare"] / df["trip_distance_km"]

zone_day = (
    df.groupby(["pickup_zone", "date"])
    .agg({
        "total_fare": "mean",
        "pay_per_km": "mean",
        "surge_multiplier": "mean"
    })
    .reset_index()
    .sort_values(["pickup_zone", "date"])
)

WINDOW = 5

zone_day["baseline_fare"] = (
    zone_day.groupby("pickup_zone")["total_fare"]
    .transform(lambda x: x.rolling(WINDOW, min_periods=3).mean())
)

zone_day["fare_drop"] = (
    (zone_day["baseline_fare"] - zone_day["total_fare"])
    / zone_day["baseline_fare"]
) > 0.15

zone_day["platform_adaptation"] = zone_day["fare_drop"]

print(zone_day[zone_day["platform_adaptation"]].head())


    pickup_zone       date  total_fare  pay_per_km  surge_multiplier  \
37            2 2024-03-10   53.886154   23.611219             1.000   
67            4 2024-03-12   43.850833   18.257669             0.800   
79            5 2024-03-10   47.783000   11.664423             0.800   
80            5 2024-03-11   48.950625   19.226014             0.824   

    baseline_fare  fare_drop  platform_adaptation  
37      66.268731       True                 True  
67      54.378813       True                 True  
79      63.033584       True                 True  
80      61.450376       True                 True  


In [52]:
cd /content/PROJECT

/content/PROJECT


In [53]:
! python /content/PROJECT/prepare_ui_data.ipynb

zone_ui_data.csv created
             zone  ...                                        explanation
0     Gandhipuram  ...  Demand is Low, resulting in Low task availabil...
1       Peelamedu  ...  Demand is High, resulting in High task availab...
2        RS_Puram  ...  Demand is Medium, resulting in Medium task ava...
3  Saibaba_Colony  ...  Demand is Low, resulting in Low task availabil...
4     Singanallur  ...  Demand is High, resulting in High task availab...
5         Ukkadam  ...  Demand is Low, resulting in Low task availabil...

[6 rows x 11 columns]


In [None]:
! python /content/PROJECT/app.py

 * Serving Flask app 'app'
 * Debug mode: on
 * Running on http://127.0.0.1:5000
[33mPress CTRL+C to quit[0m
 * Restarting with watchdog (inotify)
 * Debugger is active!
 * Debugger PIN: 462-506-135
