In [1]:
import tensorflow as tf, pandas as pd, seaborn as sns, numpy as np, matplotlib.pyplot as plt
import warnings


In [2]:
from tensorflow.keras.layers import Normalization, Dense, InputLayer
from tensorflow.keras.losses import MeanSquaredError, Huber, MeanAbsoluteError
from tensorflow.keras.metrics import RootMeanSquaredError

In [3]:
df = pd.read_csv('https://raw.githubusercontent.com/ManonYa09/Machine-_Learning_G4/refs/heads/main/Dataset/4.%20WA_Fn-UseC_-Telco-Customer-Churn.csv')

In [4]:
df['Churn'] = df['Churn'].replace({'Yes':1, 'No':0})

  df['Churn'] = df['Churn'].replace({'Yes':1, 'No':0})


In [5]:
df.head(2)

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,0
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,0


In [6]:
from sklearn.pipeline import Pipeline 
from sklearn.preprocessing import StandardScaler, OneHotEncoder, FunctionTransformer
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split

In [7]:
categorical_columns = []
numerical_columns = []
target = 'Churn'
excluded_columns = ['customerID', 'TotalCharges', 'gender']

for column in df.columns:
    if column in excluded_columns + [target]:
        continue
    unique_values = df[column].nunique()
    if unique_values <= 4:
        categorical_columns.append(column)
    else:
        numerical_columns.append(column)

In [8]:
def drop_columns(X):
    return X.drop(columns=excluded_columns)

In [9]:
preprocessor = ColumnTransformer([
    ('Scaling', StandardScaler(), numerical_columns),
    ('Ecoding', OneHotEncoder(), categorical_columns),
])
pipepline = Pipeline([
    ('drop', FunctionTransformer(drop_columns)),
    ('prepro', preprocessor)
])

In [10]:
x = df.drop(columns=target)
y = df[target]

In [11]:
X_transformed = pipepline.fit_transform(x)

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X_transformed, y, test_size=0.2, random_state=42)

# Define the neural network model


In [13]:
input_shape = X_train.shape[1]

In [14]:
input_shape

43

In [109]:
  # Determine input shape dynamically
model = tf.keras.Sequential([
    InputLayer(input_shape=(input_shape,)),
    Dense(128, activation="relu"),
    Dense(64, activation="relu"),
    Dense(32, activation="sigmoid"),
    Dense(2, activation='sigmoid')
])



In [111]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [113]:
model.fit(X_train, y_train, epochs=300, validation_data=(X_test, y_test))


Epoch 1/300
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 622us/step - accuracy: 0.7668 - loss: 0.4873 - val_accuracy: 0.8070 - val_loss: 0.4036
Epoch 2/300
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 350us/step - accuracy: 0.8025 - loss: 0.4183 - val_accuracy: 0.8027 - val_loss: 0.4098
Epoch 3/300
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 348us/step - accuracy: 0.7985 - loss: 0.4161 - val_accuracy: 0.8027 - val_loss: 0.4180
Epoch 4/300
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 357us/step - accuracy: 0.8097 - loss: 0.4109 - val_accuracy: 0.8112 - val_loss: 0.4034
Epoch 5/300
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 345us/step - accuracy: 0.7962 - loss: 0.4234 - val_accuracy: 0.8148 - val_loss: 0.4001
Epoch 6/300
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 344us/step - accuracy: 0.8083 - loss: 0.3971 - val_accuracy: 0.8062 - val_loss: 0.4033
Epoch 7/30

<keras.src.callbacks.history.History at 0x312fedc50>

In [79]:
model.predict(X_test)

[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 532us/step


array([[0.4799344 , 0.6762104 ],
       [0.47484723, 0.64561516],
       [0.48325592, 0.67417765],
       ...,
       [0.48351115, 0.6634803 ],
       [0.45274124, 0.65285677],
       [0.47008267, 0.6781703 ]], dtype=float32)

In [133]:
y_pred = model.predict(X_test)

[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 256us/step


In [135]:
y_pred[0:, 0]>0.5

array([ True,  True,  True, ...,  True,  True,  True])

In [137]:
from sklearn.metrics import confusion_matrix

In [139]:
# Make predictions on the test data
y_pred = model.predict(X_test)

# Convert the probabilities to binary class predictions (0 or 1)
y_pred_class = y_pred.argmax(axis=1)  # Takes the class with the highest probability (0 or 1)

# Now you can generate and display the confusion matrix
cm = confusion_matrix(y_test, y_pred_class)

cm

[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 324us/step


array([[879, 157],
       [184, 189]])

In [131]:
y_pred_class

array([1, 0, 0, ..., 0, 0, 0])

In [129]:
y_test

185     1
2715    0
3825    0
1807    1
132     0
       ..
6366    0
315     0
2439    0
5002    0
1161    1
Name: Churn, Length: 1409, dtype: int64

In [None]:
y_p

In [59]:
loss, accuracy = model.evaluate(X_test, y_test)

[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 312us/step - accuracy: 0.7636 - loss: 0.5239


In [61]:
accuracy

0.7693399786949158

In [18]:
model = tf.keras.Sequential([
                             InputLayer(input_shape = (8,)),
                             ,
                             Dense(128, activation = "relu"),
                             Dense(128, activation = "relu"),
                             Dense(128, activation = "relu"),
                             Dense(2,activation = 'sigmoid')])

SyntaxError: invalid syntax (4173444228.py, line 3)