In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# -----------------------
# Step 1: Generate Synthetic Dataset (10,000+ rows)
# -----------------------
np.random.seed(42)
n_samples = 15000

data = {
    "DayOfWeek": np.random.choice(["Mon","Tue","Wed","Thu","Fri","Sat","Sun"], n_samples),
    "Hour": np.random.randint(0, 24, n_samples),   # 0-23 hours
    "Holiday": np.random.choice([0,1], n_samples, p=[0.9,0.1])
}

df = pd.DataFrame(data)

# Define target: crowded if > certain threshold
df["NumPatients"] = (50
                     + 5*df["Hour"]              # busier during day
                     + np.random.randint(0,30,n_samples)
                     - 10*df["Holiday"])         # less on holidays

# Binary target: crowded or not
df["Crowded"] = (df["NumPatients"] > 120).astype(int)

# -----------------------
# Step 2: Preprocessing
# -----------------------
X = df[["DayOfWeek","Hour","Holiday"]]
y = df["Crowded"]

X = pd.get_dummies(X, drop_first=True)

# -----------------------
# Step 3: Train/Test Split
# -----------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# -----------------------
# Step 4: Train Model
# -----------------------
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# -----------------------
# Step 5: Evaluate
# -----------------------
y_pred = model.predict(X_test)
print("Model Accuracy:", accuracy_score(y_test, y_pred))

# -----------------------
# Step 6: User Input Prediction
# -----------------------
def time_to_hour_slot(time_str):
    """Convert 'HH:MM' string to hour (0-23)."""
    return int(time_str.split(":")[0])

def predict_crowd(day, time_str, holiday=0):
    hour = time_to_hour_slot(time_str)
    sample = pd.DataFrame({
        "DayOfWeek":[day],
        "Hour":[hour],
        "Holiday":[holiday]
    })
    sample = pd.get_dummies(sample, drop_first=True)
    sample = sample.reindex(columns=X.columns, fill_value=0)

    prediction = model.predict(sample)[0]
    return "Crowded" if prediction==1 else "Not Crowded"

# -----------------------
# Step 7: Suggest Best Time
# -----------------------
def suggest_best_times(day, holiday=0, open_hour=8, close_hour=20, top_k=3):
    hours = list(range(open_hour, close_hour+1))
    slots = pd.DataFrame({
        "DayOfWeek":[day]*len(hours),
        "Hour":hours,
        "Holiday":[holiday]*len(hours)
    })
    slots = pd.get_dummies(slots, drop_first=True)
    slots = slots.reindex(columns=X.columns, fill_value=0)

    preds = model.predict_proba(slots)[:,1]  # probability of crowded
    results = pd.DataFrame({"Hour": hours, "CrowdProb": preds})
    results = results.sort_values(by="CrowdProb")

    # return top K least crowded times
    best_times = results.head(top_k)
    return best_times

# -----------------------
# Example Usage
# -----------------------
user_day = "Mon"
user_time = "14:30"
user_holiday = 0

print(f"\nPrediction for {user_day} {user_time}: {predict_crowd(user_day, user_time, user_holiday)}")
print("Best time to visit:", suggest_best_time(user_day, user_holiday))


if __name__ == "__main__":
    day = input("Enter day of week (Mon/Tue/Wed/...): ")
    time_str = input("Enter time in HH:MM format (e.g., 14:30): ")
    holiday = int(input("Is it a holiday? (0 = No, 1 = Yes): "))

    result = predict_crowd(day, time_str, holiday)
    best = suggest_best_time(day, holiday)

    print(f"\nPrediction for {day} {time_str}: {result}")
    print(f"Best time to visit on {day}: {best}")

Model Accuracy: 0.933

Prediction for Mon 14:30: Crowded


NameError: name 'suggest_best_time' is not defined