In [8]:
import pandas as pd
import numpy as np
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from src.exception import CustomException
from sklearn.model_selection import train_test_split

In [18]:
x = pd.read_csv(r"D:\DS_Final_Project\Notebook\data\Merged_data\x_churn_data.csv")
y = pd.read_csv(r"D:\DS_Final_Project\Notebook\data\Merged_data\y_churn_data.csv").values.ravel()

In [19]:
x.head()

Unnamed: 0,country_id,recency,spending_of_customer,total_no_of_order,cost,price,cost_missing,price_missing,method_name_international,method_name_priority,...,order_month_name_june,order_month_name_march,order_month_name_may,order_month_name_november,order_month_name_october,order_month_name_september,status_value_cancelled,status_value_delivered,status_value_delivery in progress,status_value_order received
0,-0.500925,1.704338,-0.388584,-1.298176,1.40368,0.593354,0,0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,-0.500925,1.704338,-0.388584,-1.298176,1.40368,-1.365782,0,0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2,-0.500925,1.704338,-0.388584,-1.298176,1.40368,-1.605646,0,0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,-0.500925,1.704338,-0.388584,-1.298176,1.40368,0.593354,0,0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,-0.500925,1.704338,-0.388584,-1.298176,1.40368,-1.365782,0,0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [20]:
y

array([1, 1, 1, ..., 0, 0, 0])

In [21]:
# Spliting the data for train test

x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=42)

print("Training set shape:", x_train.shape, y_train.shape)
print("Testing set shape:", x_test.shape, y_test.shape)

Training set shape: (158163, 42) (158163,)
Testing set shape: (39541, 42) (39541,)


In [23]:
np.linspace(start=10,stop=50,num=5)

array([10., 20., 30., 40., 50.])

In [39]:
import optuna
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping



In [41]:

def objective(trial):
    
    neurons = trial.suggest_int('neurons',32,128,step=32)
    batch_size = trial.suggest_int('batch',32,128,step=16)
    learning_rate = trial.suggest_float('lr',1e-5,1e-1,log=True)
    activation_function = trial.suggest_categorical('activation', ['relu','elu', 'selu'])
    layers = trial.suggest_int('layer',5,10)
    

    
    model = Sequential()
        
    model.add(Input(shape=(x_train.shape[1],))) 
    
    for layer in range(layers):
        units = trial.suggest_int(f'layer_{layer}',32,512,step=32)
        model.add(Dense(units,activation=activation_function))
        
        if layer % 2 == 1:
            dropout_rate = trial.suggest_float(f'layer_{layer}_dropout', 0.1, 0.5, step=0.1)
            model.add(Dropout(dropout_rate))
        
    model.add(Dense(1,activation='sigmoid'))
    
    model.compile(optimizer = Adam(learning_rate=learning_rate),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    
    early_stopping = EarlyStopping(monitor='val_loss',patience=5,restore_best_weights=True)
    
    model.fit(x_train,y_train,epochs=50,
              batch_size=batch_size,
              validation_data=(x_test,y_test),
              callbacks=[early_stopping],
              verbose = 1)
    val_accuracy = model.evaluate(x_test,y_test,verbose=0)[1]
    return val_accuracy

study = optuna.create_study(direction='maximize')
study.optimize(objective,n_trials=3)

print(f"Best Hyperparameters: {study.best_params}")
print(f"Best Validation Accuracy: {study.best_value}")


[I 2025-01-04 17:49:15,429] A new study created in memory with name: no-name-60632416-5f78-43ae-8960-218542a873c9


Epoch 1/20
[1m2028/2028[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.9834 - loss: 0.0616 - val_accuracy: 0.9936 - val_loss: 0.0206
Epoch 2/20
[1m2028/2028[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - accuracy: 0.9951 - loss: 0.0165 - val_accuracy: 0.9945 - val_loss: 0.0164
Epoch 3/20
[1m2028/2028[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - accuracy: 0.9966 - loss: 0.0110 - val_accuracy: 0.9972 - val_loss: 0.0084
Epoch 4/20
[1m2028/2028[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - accuracy: 0.9973 - loss: 0.0088 - val_accuracy: 0.9978 - val_loss: 0.0065
Epoch 5/20
[1m2028/2028[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 5ms/step - accuracy: 0.9978 - loss: 0.0068 - val_accuracy: 0.9976 - val_loss: 0.0066
Epoch 6/20
[1m2028/2028[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 6ms/step - accuracy: 0.9979 - loss: 0.0062 - val_accuracy: 0.9981 - val_loss: 0.0054
Epoch 7/20
[1

[I 2025-01-04 17:51:37,226] Trial 0 finished with value: 0.999418318271637 and parameters: {'neurons': 114, 'batch': 78, 'lr': 0.009246332607151872}. Best is trial 0 with value: 0.999418318271637.


Epoch 1/20
[1m3228/3228[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 2ms/step - accuracy: 0.7411 - loss: 0.5734 - val_accuracy: 0.9558 - val_loss: 0.2935
Epoch 2/20
[1m3228/3228[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1ms/step - accuracy: 0.9642 - loss: 0.2446 - val_accuracy: 0.9773 - val_loss: 0.1488
Epoch 3/20
[1m3228/3228[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1ms/step - accuracy: 0.9795 - loss: 0.1314 - val_accuracy: 0.9830 - val_loss: 0.0985
Epoch 4/20
[1m3228/3228[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1ms/step - accuracy: 0.9839 - loss: 0.0923 - val_accuracy: 0.9867 - val_loss: 0.0773
Epoch 5/20
[1m3228/3228[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1ms/step - accuracy: 0.9879 - loss: 0.0728 - val_accuracy: 0.9884 - val_loss: 0.0669
Epoch 6/20
[1m3228/3228[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1ms/step - accuracy: 0.9897 - loss: 0.0637 - val_accuracy: 0.9896 - val_loss: 0.0609
Epoch 7/20
[1m3

[I 2025-01-04 17:54:59,301] Trial 1 finished with value: 0.9940568208694458 and parameters: {'neurons': 80, 'batch': 49, 'lr': 3.317792355621882e-05}. Best is trial 0 with value: 0.999418318271637.


Epoch 1/20
[1m1720/1720[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 5ms/step - accuracy: 0.9362 - loss: 0.2072 - val_accuracy: 0.9906 - val_loss: 0.0520
Epoch 2/20
[1m1720/1720[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - accuracy: 0.9920 - loss: 0.0479 - val_accuracy: 0.9933 - val_loss: 0.0364
Epoch 3/20
[1m1720/1720[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - accuracy: 0.9946 - loss: 0.0310 - val_accuracy: 0.9953 - val_loss: 0.0254
Epoch 4/20
[1m1720/1720[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - accuracy: 0.9957 - loss: 0.0226 - val_accuracy: 0.9959 - val_loss: 0.0200
Epoch 5/20
[1m1720/1720[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - accuracy: 0.9963 - loss: 0.0172 - val_accuracy: 0.9965 - val_loss: 0.0166
Epoch 6/20
[1m1720/1720[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - accuracy: 0.9967 - loss: 0.0146 - val_accuracy: 0.9969 - val_loss: 0.0138
Epoch 7/20
[1m

[I 2025-01-04 17:56:30,215] Trial 2 finished with value: 0.9989883899688721 and parameters: {'neurons': 96, 'batch': 92, 'lr': 0.0008773429421546105}. Best is trial 0 with value: 0.999418318271637.


Epoch 1/20
[1m1701/1701[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.9525 - loss: 0.1652 - val_accuracy: 0.9926 - val_loss: 0.0410
Epoch 2/20
[1m1701/1701[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.9934 - loss: 0.0367 - val_accuracy: 0.9950 - val_loss: 0.0267
Epoch 3/20
[1m1701/1701[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.9952 - loss: 0.0240 - val_accuracy: 0.9958 - val_loss: 0.0200
Epoch 4/20
[1m1701/1701[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - accuracy: 0.9959 - loss: 0.0177 - val_accuracy: 0.9958 - val_loss: 0.0164
Epoch 5/20
[1m1701/1701[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.9969 - loss: 0.0128 - val_accuracy: 0.9960 - val_loss: 0.0142
Epoch 6/20
[1m1701/1701[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.9968 - loss: 0.0114 - val_accuracy: 0.9964 - val_loss: 0.0114
Epoch 7/20
[1m1

[I 2025-01-04 17:57:25,235] Trial 3 finished with value: 0.9993677735328674 and parameters: {'neurons': 87, 'batch': 93, 'lr': 0.001255005933481923}. Best is trial 0 with value: 0.999418318271637.


Epoch 1/20
[1m1798/1798[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - accuracy: 0.9175 - loss: 0.2892 - val_accuracy: 0.9889 - val_loss: 0.0683
Epoch 2/20
[1m1798/1798[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - accuracy: 0.9901 - loss: 0.0635 - val_accuracy: 0.9912 - val_loss: 0.0536
Epoch 3/20
[1m1798/1798[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1ms/step - accuracy: 0.9920 - loss: 0.0503 - val_accuracy: 0.9926 - val_loss: 0.0443
Epoch 4/20
[1m1798/1798[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.9929 - loss: 0.0418 - val_accuracy: 0.9940 - val_loss: 0.0358
Epoch 5/20
[1m1798/1798[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.9946 - loss: 0.0341 - val_accuracy: 0.9946 - val_loss: 0.0295
Epoch 6/20
[1m1798/1798[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - accuracy: 0.9951 - loss: 0.0268 - val_accuracy: 0.9952 - val_loss: 0.0244
Epoch 7/20
[1m1

[I 2025-01-04 17:58:26,790] Trial 4 finished with value: 0.9974962472915649 and parameters: {'neurons': 38, 'batch': 88, 'lr': 0.0005794132946274877}. Best is trial 0 with value: 0.999418318271637.


Best Hyperparameters: {'neurons': 114, 'batch': 78, 'lr': 0.009246332607151872}
Best Validation Accuracy: 0.999418318271637


In [42]:
best_params = study.best_params
best_params

{'neurons': 114, 'batch': 78, 'lr': 0.009246332607151872}

In [None]:
# Get the best hyperparameters from the Optuna study
best_params = study.best_params

# Train the final model with the best parameters
final_model = create_model(study.best_trial)

# Early stopping to prevent overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the final model
final_model.fit(X_train_scaled, y_train, epochs=30, batch_size=128,
                validation_data=(X_test_scaled, y_test), callbacks=[early_stopping])

# Evaluate the final model
test_loss, test_accuracy = final_model.evaluate(X_test_scaled, y_test)
print(f"Test Loss: {test_loss}, Test Accuracy: {test_accuracy}")


In [None]:
# Making predictions on the test set
predictions = final_model.predict(X_test_scaled)

# Convert predictions to binary (0 or 1)
predictions_binary = (predictions > 0.5).astype(int)

# Show some predictions
print("Predictions:", predictions_binary[:10])  # Show first 10 predictions
