In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder ,OneHotEncoder
import pickle

In [2]:
def load_data(file_path):
    """
    Load data from a CSV file into a pandas DataFrame.
    
    Parameters:
    file_path (str): The path to the CSV file.
    
    Returns:
    pd.DataFrame: DataFrame containing the loaded data.
    """
    try:
        data = pd.read_csv(file_path)
        return data
    except Exception as e:
        print(f"Error loading data: {e}")
        return None




In [3]:
data =load_data('data.csv')
#drop unnecessary columns
data= data.drop(['RowNumber', 'CustomerId', 'Surname'], axis=1)


In [4]:
# Encode categorical variables
lb= LabelEncoder()
data['Gender'] = lb.fit_transform(data['Gender'])

# One-hot encode categorical variables
ohe = OneHotEncoder()
geo_encoder=ohe.fit_transform(data[['Geography']])

ohe.get_feature_names_out(['Geography'])

geo_df = pd.DataFrame(geo_encoder.toarray(), columns=ohe.get_feature_names_out(['Geography']))
print(geo_df.head())

   Geography_France  Geography_Germany  Geography_Spain
0               1.0                0.0              0.0
1               0.0                0.0              1.0
2               1.0                0.0              0.0
3               1.0                0.0              0.0
4               0.0                0.0              1.0


In [5]:
data = pd.concat([data.drop(['Geography'], axis=1), geo_df], axis=1)
data.head()

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain
0,619,0,42,2,0.0,1,1,1,101348.88,1,1.0,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0.0,0.0,1.0
2,502,0,42,8,159660.8,3,1,0,113931.57,1,1.0,0.0,0.0
3,699,0,39,1,0.0,2,0,0,93826.63,0,1.0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.1,0,0.0,0.0,1.0


In [6]:
# save the encoders and scaler 
with open('label_encoder.pkl', 'wb') as f:
    pickle.dump(lb, f)
with open('one_hot_encoder.pkl', 'wb') as f:
    pickle.dump(ohe, f)
    


In [7]:
# split the data into training and testing sets
X = data.drop('Exited', axis=1)
y = data['Exited']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# save the scaler   
with open('scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)



In [8]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((8000, 12), (2000, 12), (8000,), (2000,))

In [9]:
# ANN model
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping,TensorBoard
import datetime

# Define the ANN model
model= Sequential(
    [
        Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
        Dense(32, activation='relu'),
        Dense(16, activation='relu'),
        Dense(1, activation='sigmoid')
    ]
)






In [10]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 64)                832       
                                                                 
 dense_1 (Dense)             (None, 32)                2080      
                                                                 
 dense_2 (Dense)             (None, 16)                528       
                                                                 
 dense_3 (Dense)             (None, 1)                 17        
                                                                 
Total params: 3457 (13.50 KB)
Trainable params: 3457 (13.50 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [11]:
opt=tf.keras.optimizers.Adam(learning_rate=0.01)
model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])

In [17]:
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

# Early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=80,restore_best_weights=True)  



In [18]:

# Train the model
history = model.fit(X_train, y_train,
                    validation_data=(X_test, y_test),
                    epochs=100,
                    batch_size=32,
                    callbacks=[early_stopping, tensorboard_callback])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [19]:
# Save the model;
model.save('ann_model.h5')

  saving_api.save_model(


In [22]:
#load the tensorflow

%load_ext tensorboard


The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [23]:
%tensorboard --logdir logs/fit

Reusing TensorBoard on port 6006 (pid 12980), started 0:00:43 ago. (Use '!kill 12980' to kill it.)

In [62]:
#load the pickle files and the model
# with open('label_encoder.pkl', 'rb') as f:
#     lb = pickle.load(f)
# with open('one_hot_encoder.pkl', 'rb') as f:
#     ohe = pickle.load(f)
# with open('scaler.pkl', 'rb') as f:
#     scaler = pickle.load(f)

# using joblib to load the pickle files
import joblib
lb = joblib.load('label_encoder.pkl')
ohe = joblib.load('one_hot_encoder.pkl')
scaler = joblib.load('scaler.pkl')

model= tf.keras.models.load_model('ann_model.h5')



In [None]:
# sample  input
input_data = {
    'CreditScore': 600,
    'Geography': 'France',
    'Gender': 'Male',
    'Age': 40,
    'Tenure': 3,
    'Balance': 60000,
    'NumOfProducts': 2,
    'HasCrCard': 1,
    'IsActiveMember': 1,
    'EstimatedSalary': 50000
}

input_data=pd.DataFrame([input_data])

input_data['Gender'] = lb.transform(input_data['Gender'])

geo_encoded = ohe.transform(input_data[['Geography']]).toarray()

geo_df = pd.DataFrame(geo_encoded, columns=ohe.get_feature_names_out(['Geography']))
input_data = pd.concat([input_data.drop(['Geography'], axis=1).reset_index(drop=True), geo_df], axis=1)

input_data = scaler.transform(input_data)

# Make prediction
prediction = model.predict(input_data)

print(f"Prediction: {'Exited' if prediction[0][0] > 0.5 else 'Not Exited'} with probability {prediction[0][0]:.2f}")

[[0.05545291]]
Prediction: Not Exited with probability 0.06
