In [None]:
# --- Placeholder Generation Script ---
import pandas as pd
import numpy as np
import pickle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier

print("--- Step 1: Creating Sample In-Memory Data ---")

# We create a simple DataFrame in memory to avoid file dependencies.
# The columns MUST MATCH what your app.py expects.
data = {
    'age': [35, 22, 68, 19, 45, 58, 29, 7, 42, 51],
    'gender_encoded': [1, 0, 0, 1, 0, 1, 0, 1, 0, 1],
    'scholarship': [0, 1, 0, 0, 0, 0, 0, 0, 1, 0],
    'hypertension': [1, 0, 1, 0, 0, 1, 0, 0, 0, 0],
    'diabetes': [0, 0, 1, 0, 0, 0, 0, 0, 1, 0],
    'alcoholism': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
    'handicap': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
    'sms_received': [1, 1, 0, 1, 0, 1, 1, 0, 1, 1],
    'lead_time_days': [14, 5, 2, 25, 8, 3, 10, 1, 22, 6],
    'appointment_dow': [1, 2, 3, 4, 1, 2, 3, 4, 1, 2],
    'same_day_appointment': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
    'has_chronic_condition': [1, 0, 1, 0, 0, 1, 0, 0, 1, 0],
    'total_conditions': [1, 0, 2, 0, 0, 1, 0, 0, 2, 0],
    'no_show': [0, 1, 0, 1, 0, 0, 0, 0, 1, 0] # Our target variable
}
df = pd.DataFrame(data)
print("Sample DataFrame created successfully.")

print("\n--- Step 2: Preparing Data for Training ---")
# Define the features the model will be trained on.
# This list MUST match the features expected by your app.
feature_names = [
    'age', 'gender_encoded', 'scholarship', 'hypertension', 'diabetes', 
    'alcoholism', 'handicap', 'sms_received', 'lead_time_days', 
    'appointment_dow', 'same_day_appointment', 'has_chronic_condition', 
    'total_conditions'
]

X = df[feature_names]
y = df['no_show']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

print("Data split into training and testing sets.")

print("\n--- Step 3: Creating and Fitting the Scaler and Model ---")
# Create and fit the scaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

# Create and train the model
model = RandomForestClassifier(n_estimators=10, random_state=42) # n_estimators=10 is small for a quick run
model.fit(X_train_scaled, y_train)

print("Scaler and Model have been trained successfully.")

print("\n--- Step 4: Saving Placeholder Files ---")

# The notebook is in a subfolder, so we use '../' to save to the main project directory.
# 1. Save the trained model
with open('../trained_model.pkl', 'wb') as f:
    pickle.dump(model, f)
print("✅ trained_model.pkl has been created in the main project folder.")

# 2. Save the fitted scaler
with open('../scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)
print("✅ scaler.pkl has been created in the main project folder.")

# 3. Save the list of feature names
with open('../feature_names.pkl', 'wb') as f:
    pickle.dump(feature_names, f)
print("✅ feature_names.pkl has been created in the main project folder.")