# Modelling

In [None]:
import pandas as pd

from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler

## Load Data

In [None]:
# Read in the banking data set.
df = pd.read_csv("../0_data/banking/bank-additional-full.csv", sep=";")
df.head(3)

In [None]:
# Split features and labels.
X = df.drop(columns=["duration", "pdays", "y"])
y = df["y"]

## Data Preparation

In [None]:
# Get column names per data type.
categorical = list(X.select_dtypes("object").columns)
numerical = list(X.select_dtypes("number").columns)

In [None]:
# Categorical columns.
print("Categorical: ", categorical)
print("Numerical:   ", numerical)

In [None]:
# Create a OneHotEncoder and StandardScaler.
encoder = OneHotEncoder(sparse_output=False)
scaler = StandardScaler()

In [None]:
# Set up the ColumnTransformer.
transformer = ColumnTransformer(
    transformers=[
        ("encode_categorical", encoder, categorical),
        ("scale_numerical", scaler, numerical),
    ]
)

In [None]:
# Another way to get a pandas DataFrame.
transformer.set_output(transform="pandas")
Xt = transformer.fit_transform(X)
Xt.head(3)

## Linear Model

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
# Create logistic regression model instance.
lm = LogisticRegression(max_iter=500)

In [None]:
# Fit to the training data.
lm.fit(Xt, y)

In [None]:
# Create predictions.
predicted = lm.predict(Xt)

In [None]:
# Join predictions to the data.
result = df.assign(predicted=predicted)
result.tail(3)

In [None]:
# Check correspondence using value_counts.
result[["y", "predicted"]].value_counts(sort=False)

In [None]:
# Get coefficients from the model.
coeffs = pd.DataFrame(
    data=lm.coef_,                  # Use lm.coef_ to get the values.
    columns=lm.feature_names_in_,   # And lm.feature_names_in_ for the names.
)

In [None]:
# Coefficients ranked by value.
(
    coeffs
    .round(3)
    .T
    .rename(columns={0: "Coefficient"})
    .sort_values("Coefficient", ascending=False)
)