In [11]:
import flaml
import joblib
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.metrics import f1_score
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder

from transformers import CustomerIdTransformer

In [12]:
df = pd.read_csv("telco/telco-churn-train.csv")

In [13]:
df.sample(10)

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
397,6635-MYYYZ,Female,0,No,No,30,Yes,No,DSL,Yes,...,Yes,Yes,Yes,Yes,One year,No,Bank transfer (automatic),85.35,2530.4,Yes
2369,7814-LEEVE,Female,0,Yes,No,1,Yes,No,No,No internet service,...,No internet service,No internet service,No internet service,No internet service,Month-to-month,Yes,Bank transfer (automatic),20.3,20.3,No
1602,1309-XGFSN,Male,1,Yes,Yes,52,Yes,Yes,DSL,No,...,Yes,No,Yes,Yes,One year,Yes,Electronic check,80.85,4079.55,No
5591,4816-OKWNX,Male,0,Yes,Yes,50,Yes,No,Fiber optic,No,...,Yes,Yes,Yes,Yes,One year,Yes,Bank transfer (automatic),103.4,5236.4,No
1112,5651-YLPRD,Female,0,Yes,Yes,32,Yes,No,Fiber optic,No,...,No,No,Yes,No,Month-to-month,Yes,Electronic check,86.1,2723.75,No
2791,6979-TNDEU,Female,0,No,No,8,Yes,No,No,No internet service,...,No internet service,No internet service,No internet service,No internet service,Month-to-month,No,Mailed check,19.2,156.85,No
850,4020-KIUDI,Male,0,Yes,Yes,6,Yes,No,No,No internet service,...,No internet service,No internet service,No internet service,No internet service,Month-to-month,No,Credit card (automatic),19.85,138.85,No
1079,4658-HCOHW,Female,0,Yes,Yes,21,Yes,No,Fiber optic,No,...,Yes,Yes,Yes,Yes,Month-to-month,Yes,Electronic check,102.8,2110.15,Yes
1213,3470-OBUET,Female,0,Yes,Yes,67,Yes,Yes,DSL,Yes,...,No,Yes,Yes,No,Two year,No,Credit card (automatic),74.0,4868.4,No
4814,2003-CKLOR,Male,0,No,No,66,Yes,No,Fiber optic,No,...,Yes,No,Yes,Yes,Month-to-month,No,Electronic check,99.5,6710.5,Yes


In [14]:
X = df.drop(columns=["Churn"], axis=1)
y = df["Churn"]

In [15]:
def build_pipeline():
    id_columns = ["customerID"]
    categorical_features = list(
        X.loc[:, df.dtypes == 'object'].columns.values
    )
    categorical_features = [
        f for f in categorical_features if not f in id_columns
    ]
    id_transformer = (
        "customer_id",
        CustomerIdTransformer(id_columns),
        id_columns
    )
    encode_transformer = (
        "encoder",
        OneHotEncoder(sparse_output=False),
        categorical_features
    )
    preprocessor = ColumnTransformer(
        transformers=[
            id_transformer,
            encode_transformer,
        ],
        remainder='passthrough'
    )
    automl_settings = {
        "time_budget": 120,
        "metric": "accuracy",
        "task": "classification",
        "estimator_list": ["lgbm", "rf"],
        "custom_hp": {
            "n_estimators": {
                "domain": flaml.tune.uniform(20, 500)
            }
        },
        "verbose": -1
    }
    pipeline_settings = {
        f"automl__{key}": value for key, value in automl_settings.items()
    }
    automl = flaml.AutoML()
    pipeline = Pipeline(
        steps=[("preprocessor", preprocessor),
               ("automl", automl)]
    )
    return pipeline, pipeline_settings

In [16]:
pipeline, settings = build_pipeline()

In [17]:
pipeline.fit(X, y, **settings)

### Validation

In [19]:
print(f"F1: {f1_score(pipeline.predict(X), y, pos_label='Yes')}")

F1: 0.6601503759398495


In [8]:
pipeline.predict(X.iloc[[0]])

array(['No'], dtype=object)

In [9]:
joblib.dump(pipeline, "churn_pipeline.pkl")

['churn_lgbm.pkl']

## Gradio

In [15]:
import gradio as gr

In [17]:
inputs = [gr.Dataframe(row_count=(2, "dynamic"), col_count=(21, "dynamic"), label="Input Data", interactive=1)]
outputs = [gr.Dataframe(row_count=(2, "dynamic"), col_count=(1, "fixed"), label="Predictions", headers=["Churn"])]

model = joblib.load("churn_lgbm.pkl")

df = pd.read_csv("telco/telco-churn.csv")


def infer(input_dataframe):
    return pd.DataFrame(model.predict(input_dataframe))


gr.Interface(fn=infer, inputs=inputs, outputs=outputs, examples=[[df.head(2)]]).launch()

Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


