# HyperParams

### Importing Libraries

In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

In [3]:
pip install bayesian-optimization #Only needed for the first time

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting bayesian-optimization
  Downloading bayesian-optimization-1.2.0.tar.gz (14 kB)
Building wheels for collected packages: bayesian-optimization
  Building wheel for bayesian-optimization (setup.py) ... [?25l[?25hdone
  Created wheel for bayesian-optimization: filename=bayesian_optimization-1.2.0-py3-none-any.whl size=11685 sha256=8ac6312e4b0d67bcd1a0a071a50b7e512e3f7fee4d3f2c6cf4e8e4967174ce1c
  Stored in directory: /root/.cache/pip/wheels/fd/9b/71/f127d694e02eb40bcf18c7ae9613b88a6be4470f57a8528c5b
Successfully built bayesian-optimization
Installing collected packages: bayesian-optimization
Successfully installed bayesian-optimization-1.2.0


In [4]:
from bayes_opt import BayesianOptimization
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier as RFC

### Reading The Dataset

In [5]:
data = pd.read_csv("Telecom_data.csv")
data.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


### Preprocessing The Dataset

#### Removing The Null or Zero Values In The Dataset wrt TotalCharges

In [6]:
data["TotalCharges"] = data["TotalCharges"].apply(lambda x: float("0"+x.strip()))

#### Checking For Null Values In The Dataset

In [7]:
data.isnull().sum()

customerID          0
gender              0
SeniorCitizen       0
Partner             0
Dependents          0
tenure              0
PhoneService        0
MultipleLines       0
InternetService     0
OnlineSecurity      0
OnlineBackup        0
DeviceProtection    0
TechSupport         0
StreamingTV         0
StreamingMovies     0
Contract            0
PaperlessBilling    0
PaymentMethod       0
MonthlyCharges      0
TotalCharges        0
Churn               0
dtype: int64

#### Converting The String Values To Binary

In [8]:
cols = ["gender", "Partner", "Dependents", "PhoneService", "MultipleLines", "InternetService", "OnlineSecurity", "OnlineBackup", "DeviceProtection", "TechSupport", "StreamingTV", "StreamingMovies", "Contract", "PaperlessBilling", "PaymentMethod", ]
lbl_enc = LabelEncoder()
for col in cols:
    data[col] = lbl_enc.fit_transform(data[col])
data.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,0,0,1,0,1,0,1,0,0,...,0,0,0,0,0,1,2,29.85,29.85,No
1,5575-GNVDE,1,0,0,0,34,1,0,0,2,...,2,0,0,0,1,0,3,56.95,1889.5,No
2,3668-QPYBK,1,0,0,0,2,1,0,0,2,...,0,0,0,0,0,1,3,53.85,108.15,Yes
3,7795-CFOCW,1,0,0,0,45,0,1,0,2,...,2,2,0,0,1,0,0,42.3,1840.75,No
4,9237-HQITU,0,0,0,0,2,1,0,1,0,...,0,0,0,0,0,1,2,70.7,151.65,Yes


#### Converting Churn Values To Binary

In [9]:
map_dict = {"Yes":1, "No":0}
data["Churn"] = data["Churn"].map(map_dict)
data["Churn"].value_counts()

cols_to_use = [col for col in data.columns if col not in ["customerID", "Churn"]]
x = data[cols_to_use].values
y = data["Churn"].values
data.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,0,0,1,0,1,0,1,0,0,...,0,0,0,0,0,1,2,29.85,29.85,0
1,5575-GNVDE,1,0,0,0,34,1,0,0,2,...,2,0,0,0,1,0,3,56.95,1889.5,0
2,3668-QPYBK,1,0,0,0,2,1,0,0,2,...,0,0,0,0,0,1,3,53.85,108.15,1
3,7795-CFOCW,1,0,0,0,45,0,1,0,2,...,2,2,0,0,1,0,0,42.3,1840.75,0
4,9237-HQITU,0,0,0,0,2,1,0,1,0,...,0,0,0,0,0,1,2,70.7,151.65,1


### Spliting The Dataset For Training And Testing

In [10]:
train_X, test_X, train_y, test_y = train_test_split(x, y, test_size=0.2, random_state=2020)

### Bayesian Optimization

#### Optimization Function

In [11]:
def rfc_cv(n_estimators, min_samples_split, max_features, data, targets):
    estimator = RFC(
        n_estimators=n_estimators,
        min_samples_split=min_samples_split,
        max_features=max_features,
        random_state=2
    )
    cval = cross_val_score(estimator, data, targets, scoring='neg_log_loss', cv=4)
    return cval.mean()

def optimize_rfc(data, targets):
    def rfc_crossval(n_estimators, min_samples_split, max_features):
        return rfc_cv(
            n_estimators=int(n_estimators),
            min_samples_split=int(min_samples_split),
            max_features=max(min(max_features, 0.999), 1e-3),
            data=data,
            targets=targets,
        )
    optimizer = BayesianOptimization(
        f=rfc_crossval,
        pbounds={
            "n_estimators": (10, 250),
            "min_samples_split": (2, 25),
            "max_features": (0.1, 0.999),
        },
        random_state=1234,
        verbose=2
    )
    optimizer.maximize(n_iter=10)

    print("Final result:", optimizer.max)
    
    return optimizer
    
result = optimize_rfc(train_X, train_y)

|   iter    |  target   | max_fe... | min_sa... | n_esti... |
-------------------------------------------------------------
| [0m 1       [0m | [0m-0.4396  [0m | [0m 0.2722  [0m | [0m 16.31   [0m | [0m 115.1   [0m |
| [0m 2       [0m | [0m-0.4545  [0m | [0m 0.806   [0m | [0m 19.94   [0m | [0m 75.42   [0m |
| [0m 3       [0m | [0m-0.4405  [0m | [0m 0.3485  [0m | [0m 20.44   [0m | [0m 240.0   [0m |
| [0m 4       [0m | [0m-0.4816  [0m | [0m 0.8875  [0m | [0m 10.23   [0m | [0m 130.2   [0m |
| [0m 5       [0m | [0m-0.4632  [0m | [0m 0.7144  [0m | [0m 18.39   [0m | [0m 98.86   [0m |
| [0m 6       [0m | [0m-0.4588  [0m | [0m 0.8021  [0m | [0m 18.14   [0m | [0m 230.7   [0m |
| [0m 7       [0m | [0m-0.4721  [0m | [0m 0.9241  [0m | [0m 16.19   [0m | [0m 80.11   [0m |
| [0m 8       [0m | [0m-0.4482  [0m | [0m 0.8985  [0m | [0m 23.55   [0m | [0m 73.52   [0m |
| [0m 9       [0m | [0m-0.4546  [0m | [0m 0.8987  [0m 

#### The Best Set Of Parameters And Values

In [13]:
result.max

{'target': -0.42985013588025345,
 'params': {'max_features': 0.1,
  'min_samples_split': 25.0,
  'n_estimators': 237.63878480801307}}