In [None]:
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from sklearn.pipeline import Pipeline\n",
    "from sklearn.compose import ColumnTransformer\n",
    "from sklearn.preprocessing import OneHotEncoder\n",
    "from sklearn.impute import SimpleImputer\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "\n",
    "# ------------------------------------------------------------\n",
    "# ChurnPredictor Class (Golden Solution)\n",
    "# ------------------------------------------------------------\n",
    "class ChurnPredictor:\n",
    "    def __init__(self):\n",
    "        \"\"\"Initialize churn model with preprocessing + Logistic Regression.\"\"\"\n",
    "        self.target_name = \"Churn\"\n",
    "\n",
    "        # Explicitly define expected features for robustness\n",
    "        self.features = [\n",
    "            \"customerID\",\n",
    "            \"gender\",\n",
    "            \"SeniorCitizen\",\n",
    "            \"Partner\",\n",
    "            \"Dependents\",\n",
    "            \"tenure\",\n",
    "            \"PhoneService\",\n",
    "            \"MultipleLines\",\n",
    "            \"InternetService\",\n",
    "            \"OnlineSecurity\",\n",
    "            \"OnlineBackup\",\n",
    "            \"DeviceProtection\",\n",
    "            \"TechSupport\",\n",
    "            \"StreamingTV\",\n",
    "            \"StreamingMovies\",\n",
    "            \"Contract\",\n",
    "            \"PaperlessBilling\",\n",
    "            \"PaymentMethod\",\n",
    "            \"MonthlyCharges\",\n",
    "            \"TotalCharges\"\n",
    "        ]\n",
    "\n",
    "        self.numeric_features = [\n",
    "            \"SeniorCitizen\",\n",
    "            \"tenure\",\n",
    "            \"MonthlyCharges\",\n",
    "            \"TotalCharges\"\n",
    "        ]\n",
    "\n",
    "        self.categorical_features = [\n",
    "            \"customerID\",\n",
    "            \"gender\",\n",
    "            \"Partner\",\n",
    "            \"Dependents\",\n",
    "            \"PhoneService\",\n",
    "            \"MultipleLines\",\n",
    "            \"InternetService\",\n",
    "            \"OnlineSecurity\",\n",
    "            \"OnlineBackup\",\n",
    "            \"DeviceProtection\",\n",
    "            \"TechSupport\",\n",
    "            \"StreamingTV\",\n",
    "            \"StreamingMovies\",\n",
    "            \"Contract\",\n",
    "            \"PaperlessBilling\",\n",
    "            \"PaymentMethod\"\n",
    "        ]\n",
    "\n",
    "        numeric_transformer = Pipeline(steps=[\n",
    "            (\"imputer\", SimpleImputer(strategy=\"median\")),\n",
    "        ])\n",
    "\n",
    "        categorical_transformer = Pipeline(steps=[\n",
    "            (\"imputer\", SimpleImputer(strategy=\"most_frequent\")),\n",
    "            (\"onehot\", OneHotEncoder(handle_unknown=\"ignore\")),\n",
    "        ])\n",
    "\n",
    "        self.preprocessor = ColumnTransformer(\n",
    "            transformers=[\n",
    "                (\"num\", numeric_transformer, self.numeric_features),\n",
    "                (\"cat\", categorical_transformer, self.categorical_features),\n",
    "            ],\n",
    "            remainder=\"drop\",\n",
    "        )\n",
    "\n",
    "        self.model = LogisticRegression(\n",
    "            max_iter=2000,\n",
    "            solver=\"lbfgs\"\n",
    "        )\n",
    "\n",
    "        self.pipeline = Pipeline(steps=[\n",
    "            (\"preprocessor\", self.preprocessor),\n",
    "            (\"model\", self.model),\n",
    "        ])\n",
    "\n",
    "    def _prepare_X(self, X):\n",
    "        \"\"\"Ensure required columns exist, ignore extra columns, and clean data types.\"\"\"\n",
    "        Xc = X.copy()\n",
    "\n",
    "        # Filter to expected columns (robustness against garbage columns)\n",
    "        keep_cols = [c for c in self.features if c in Xc.columns]\n",
    "        Xc = Xc[keep_cols].copy()\n",
    "\n",
    "        # Create missing expected columns\n",
    "        for c in self.features:\n",
    "            if c not in Xc.columns:\n",
    "                Xc[c] = np.nan\n",
    "\n",
    "        # Convert TotalCharges to numeric (it can appear as string in Telco datasets)\n",
    "        if \"TotalCharges\" in Xc.columns:\n",
    "            Xc[\"TotalCharges\"] = pd.to_numeric(Xc[\"TotalCharges\"], errors=\"coerce\")\n",
    "\n",
    "        return Xc\n",
    "\n",
    "    def fit(self, X, y):\n",
    "        \"\"\"Fit the churn predictor model.\"\"\"\n",
    "        Xp = self._prepare_X(X)\n",
    "\n",
    "        if isinstance(y, (pd.Series, pd.DataFrame)):\n",
    "            y_series = y.squeeze()\n",
    "        else:\n",
    "            y_series = pd.Series(y)\n",
    "\n",
    "        # Map labels if they are strings\n",
    "        if y_series.dtype == object:\n",
    "            y_series = y_series.map({\"No\": 0, \"Yes\": 1})\n",
    "\n",
    "        self.pipeline.fit(Xp, y_series)\n",
    "        return self\n",
    "\n",
    "    def predict(self, X):\n",
    "        \"\"\"Predict probability of churn (P(Churn='Yes')).\"\"\"\n",
    "        Xp = self._prepare_X(X)\n",
    "        proba = self.pipeline.predict_proba(Xp)[:, 1]\n",
    "        return proba\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}