In [3]:
# assignment2.py
# ------------------------------------------------------------
# Model type: Random Forest
# Task: Predict whether a purchase is a meal (1 = meal, 0 = not)
# ------------------------------------------------------------

import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.impute import SimpleImputer

# ------------------ Load Data ------------------
train = pd.read_csv("https://github.com/dustywhite7/Econ8310/raw/master/AssignmentData/assignment3.csv")
test = pd.read_csv("https://github.com/dustywhite7/Econ8310/raw/master/AssignmentData/assignment3test.csv")

# ------------------ Feature Engineering ------------------
def feature_engineer(df):
    df = df.copy()
    df.columns = [c.strip() for c in df.columns]
    if "DateTime" in df.columns:
        dt = pd.to_datetime(df["DateTime"], errors="coerce")
        df["hour"] = dt.dt.hour
        df["dow"] = dt.dt.dayofweek
        df["month"] = dt.dt.month
    for col in ["id", "DateTime"]:
        if col in df.columns:
            df.drop(columns=col, inplace=True)
    return df

train = feature_engineer(train)
test = feature_engineer(test)

# ------------------ Split Data ------------------
y = train["meal"].astype(int)
X = train.drop(columns=["meal"])

# Handle missing values
imputer = SimpleImputer(strategy="most_frequent")
X_imputed = imputer.fit_transform(X)

# ------------------ Define model (must be top-level) ------------------
def model(random_state=42):
    return RandomForestClassifier(
        n_estimators=300,
        random_state=random_state,
        n_jobs=-1,
        class_weight="balanced_subsample"
    )

# ------------------ Fit model (must be top-level) ------------------
modelFit = model()
modelFit.fit(X_imputed, y)

# ------------------ Make predictions (must be top-level) ------------------
# Align test columns with training
for col in X.columns:
    if col not in test.columns:
        test[col] = 0
test = test[X.columns]

X_test = imputer.transform(test)
pred = pd.Series(modelFit.predict(X_test), name="pred")

