In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time
import joblib
import seaborn as sns
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler

In [None]:
# from google.colab import drive
# drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# Load dataset
churn = pd.read_csv("Churn_Modelling.csv")

In [4]:
# Print dataset info
print(churn.head())
print(churn.shape)

   RowNumber  CustomerId   Surname  CreditScore Geography  Gender  Age  \
0          1    15634602  Hargrave          619    France  Female   42   
1          2    15647311      Hill          608     Spain  Female   41   
2          3    15619304      Onio          502    France  Female   42   
3          4    15701354      Boni          699    France  Female   39   
4          5    15737888  Mitchell          850     Spain  Female   43   

   Tenure    Balance  NumOfProducts  HasCrCard  IsActiveMember  \
0       2       0.00              1          1               1   
1       1   83807.86              1          0               1   
2       8  159660.80              3          1               0   
3       1       0.00              2          0               0   
4       2  125510.82              1          1               1   

   EstimatedSalary  Exited  
0        101348.88       1  
1        112542.58       0  
2        113931.57       1  
3         93826.63       0  
4         790

In [5]:

# Remove unnecessary columns
churn.drop(['RowNumber', 'CustomerId', 'Surname'], axis=1, inplace=True)

# Convert categorical variables to numerical
churn = pd.get_dummies(churn, drop_first=True)

# Split features and labels
X = churn.drop('Exited', axis=1).values
y = churn['Exited'].values


In [6]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [7]:
# Define the model
model = Sequential([
    Dense(128, input_shape=(X_train.shape[1],), activation='relu'),
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [8]:
# Compile the model
optimizer = Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

# Convert data to float32
X_train = np.array(X_train, dtype=np.float32)
y_train = np.array(y_train, dtype=np.float32)
X_test = np.array(X_test, dtype=np.float32)
y_test = np.array(y_test, dtype=np.float32)

In [9]:
# Train the model
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2)


Epoch 1/50
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.6969 - loss: 0.5959 - val_accuracy: 0.8200 - val_loss: 0.4202
Epoch 2/50
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8120 - loss: 0.4529 - val_accuracy: 0.8400 - val_loss: 0.4019
Epoch 3/50
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8151 - loss: 0.4305 - val_accuracy: 0.8444 - val_loss: 0.3884
Epoch 4/50
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8335 - loss: 0.4151 - val_accuracy: 0.8512 - val_loss: 0.3773
Epoch 5/50
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8263 - loss: 0.4112 - val_accuracy: 0.8537 - val_loss: 0.3670
Epoch 6/50
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8416 - loss: 0.3881 - val_accuracy: 0.8506 - val_loss: 0.3642
Epoch 7/50
[1m200/200[0m 

In [10]:
# Custom data for prediction
custom_data = {
    'CreditScore': [650],
    'Geography_Germany': [0],
    'Geography_Spain': [1],
    'Gender_Male': [1],
    'Age': [30],
    'Tenure': [5],
    'Balance': [50000.0],
    'NumOfProducts': [2],
    'HasCrCard': [1],
    'IsActiveMember': [0],
    'EstimatedSalary': [75000.0]
}

In [11]:
custom_input_df = pd.DataFrame(custom_data)

In [12]:
# Ensure the custom input data has the same columns as the training data
for col in churn.drop('Exited', axis=1).columns:
    if col not in custom_input_df.columns:
        custom_input_df[col] = 0

In [13]:
# Reorder the custom input data to match the training data columns
custom_input_df = custom_input_df[churn.drop('Exited', axis=1).columns]

# Scale the custom input data
custom_input_scaled = scaler.transform(custom_input_df)



In [14]:
# Predict using the model
predictions = model.predict(custom_input_scaled)
print(predictions)

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print("Test Accuracy: {:.2f}%".format(accuracy * 100))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
[[0.02937985]]
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8595 - loss: 0.3355
Test Accuracy: 86.15%


In [15]:
predictions

array([[0.02937985]], dtype=float32)

In [16]:
# Evaluate the model on the test set
model.evaluate(X_test, y_test)

# Predict on the test set
y_pred = model.predict(X_test)
preds = [1 if i > 0.5 else 0 for i in y_pred]
y_pred


[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8595 - loss: 0.3355
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 868us/step


array([[0.04046677],
       [0.01963852],
       [0.10119758],
       ...,
       [0.5137037 ],
       [0.20239846],
       [0.2753581 ]], dtype=float32)

In [17]:
# Accuracy score
from sklearn.metrics import accuracy_score
test_acc = accuracy_score(y_test , preds)
print(f'Accuracy Score = {test_acc}')

Accuracy Score = 0.8615


In [18]:
joblib.dump(model,"model_coba.pkl")

['model_coba.pkl']

In [22]:
t = time.time()
export_path_sm = "./{}.h5".format(int(t))
print(export_path_sm)
model.save(export_path_sm)




./1719760031.h5


In [23]:
!ls {export_path_sm}

'ls' is not recognized as an internal or external command,
operable program or batch file.


In [25]:
import time
import tensorflow as tf

In [28]:
model.save(export_path_sm, save_format='tf')



In [29]:
reloaded_sm = tf.saved_model.load(export_path_sm)

OSError: SavedModel file does not exist at: ./1719760031.h5\{saved_model.pbtxt|saved_model.pb}

In [27]:
result_batch = model.predict(custom_input_df)

# Prediction from the loaded model
reload_sm_result_batch = reloaded_sm(tf.cast(custom_input_df, tf.float32), training=False).numpy()

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step


NameError: name 'reloaded_sm' is not defined

In [None]:
result_batch

array([[1.]], dtype=float32)

In [None]:
reload_sm_result_batch

array([[1.]], dtype=float32)

In [None]:
# try:
#   from google.colab import files
#   files.download('./model_coba.pkl')
# except ImportError:
#   pass