# **Telco Customer Churn** ( CRISP-DM Chapter 1)

## **. Deployment**

In [None]:
df = pd.read_csv(
    "/content/sample_data/WA_Fn-UseC_-Telco-Customer-Churn.csv",)  # Path to the CSV file

In [None]:
import joblib

# Columns for different preprocessing steps
label_encode_cols = ["Partner", "Dependents", "PhoneService", "PaperlessBilling"]
one_hot_encode_cols = ["MultipleLines", "InternetService", "OnlineSecurity", "OnlineBackup",
                       "DeviceProtection", "TechSupport", "StreamingTV", "StreamingMovies",
                       "Contract", "PaymentMethod"]
min_max_scale_cols = ["tenure", "MonthlyCharges", "TotalCharges"]

In [None]:
# Separate Features and Target
# Drop 'customerID' and 'Churn' from the features, keeping 'Churn' as the target
X = df.drop(columns=['customerID', 'Churn'])
y = df['Churn']

In [None]:
# Debug: Print unique values of target variable before encoding
print("Unique values in 'Churn' before encoding:", y.unique())

Unique values in 'Churn' before encoding: ['No' 'Yes']


In [None]:
# Encode the Target Variable
# Initialize the LabelEncoder
le_target = LabelEncoder()

# Fit and transform the target variable 'Churn' to numerical values
y = le_target.fit_transform(y)

In [None]:
# Debug: Print unique values of target variable after encoding
print("Unique values in 'Churn' after encoding:", y)

Unique values in 'Churn' after encoding: [0 0 1 ... 0 1 0]


**Label Encoder for the Target Variable**

In [None]:
# Save the LabelEncoder for target variable
joblib.dump(le_target, 'label_encoder_target.pkl')
print("Label encoder for target saved.")

Label encoder for target saved.


**Handle Missing Values and Convert Data Types**

In [None]:
# Replace non-numeric values with NaN and fill with the mean of the column
X[min_max_scale_cols] = X[min_max_scale_cols].replace(' ', np.nan).astype(float)
X[min_max_scale_cols] = X[min_max_scale_cols].fillna(X[min_max_scale_cols].mean())

**Saving Label Encoders**

In [None]:
# **Label Encode Specified Columns**
label_encoders = {}
for col in label_encode_cols + ["gender"]:
    le = LabelEncoder()
    X[col] = le.fit_transform(X[col])
    label_encoders[col] = le

In [None]:
joblib.dump(label_encoders, 'label_encoders.pkl')
print("Label encoders saved.")

Label encoders saved.


**Saving One-Hot Encoder**

In [None]:
# **One-Hot Encode Specified Columns**
one_hot_encoder = OneHotEncoder(sparse=False, handle_unknown='ignore')
one_hot_encoded = one_hot_encoder.fit_transform(X[one_hot_encode_cols])



In [None]:
# Save the one-hot encoder
joblib.dump(one_hot_encoder, 'one_hot_encoder.pkl')
print("One-hot encoder saved.")

One-hot encoder saved.


**Saving MinMax Scaler**

In [None]:
# **Min-Max Scale Specified Columns**
min_max_scaler = MinMaxScaler()
scaled_numerical = min_max_scaler.fit_transform(X[min_max_scale_cols])

In [None]:
# Save the min-max scaler to a file
joblib.dump(min_max_scaler, 'min_max_scaler.pkl')
print("Min-max scaler saved.")

Min-max scaler saved.


**Combine Processed Columns**

In [None]:
# Combine label encoded columns, scaled numerical columns, and one-hot encoded columns
X_processed = np.hstack((X[label_encode_cols + ["gender"]].values, scaled_numerical, one_hot_encoded))

In [None]:
X_processed

array([[1., 0., 0., ..., 0., 1., 0.],
       [0., 0., 1., ..., 0., 0., 1.],
       [0., 0., 1., ..., 0., 0., 1.],
       ...,
       [1., 1., 0., ..., 0., 1., 0.],
       [1., 0., 1., ..., 0., 0., 1.],
       [0., 0., 1., ..., 0., 0., 0.]])

In [None]:
# **Split Data into Training and Testing Sets**
X_train, X_test, y_train, y_test = train_test_split(X_processed, y, test_size=0.2, random_state=42)

**Saving the Trained Logistic Regression Model**


In [None]:
# **Train the Model**
model = LogisticRegression(max_iter=1000, random_state=42)
model.fit(X_train, y_train)

In [None]:
# Save the trained model
model_file = 'logistic_regression_model.pkl'
joblib.dump(model, model_file)
print(f"model saved as {model_file}.")

model saved as logistic_regression_model.pkl.



###**requirements.txt**





In [None]:
pip show scikit-learn

Name: scikit-learn
Version: 1.3.2
Summary: A set of python modules for machine learning and data mining
Home-page: http://scikit-learn.org
Author: 
Author-email: 
License: new BSD
Location: /usr/local/lib/python3.10/dist-packages
Requires: joblib, numpy, scipy, threadpoolctl
Required-by: bigframes, fastai, imbalanced-learn, librosa, mlxtend, sklearn-pandas, yellowbrick


*   pandas
*   numpy==1.21.5
*   scikit-learn==1.2.2
*   gradio
*   joblib