<a href="https://colab.research.google.com/github/HLokeshwari/ArtificialNeuralNetwork/blob/main/SIXTH.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
pip install scikeras

Collecting scikeras
  Downloading scikeras-0.13.0-py3-none-any.whl.metadata (3.1 kB)
Downloading scikeras-0.13.0-py3-none-any.whl (26 kB)
Installing collected packages: scikeras
Successfully installed scikeras-0.13.0


In [10]:
pip install imbalanced-learn



In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, StratifiedShuffleSplit
from sklearn.preprocessing import StandardScaler, LabelEncoder
from tensorflow import keras
from tensorflow.keras import layers, Input
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from scikeras.wrappers import KerasClassifier
from imblearn.over_sampling import SMOTE

# 1. Data Pre-processing
df = pd.read_csv('churn.csv')  # Replace 'churn.csv' with your actual file path

# Convert 'churn' to numerical (0 and 1)
df['churn'] = df['churn'].apply(lambda x: 0 if x == 'FALSE' else 1)

# Handle categorical features (excluding phone number)
categorical_cols = df.select_dtypes(include='object').columns
for col in categorical_cols:
    if col != 'phone number':
        le = LabelEncoder()
        df[col] = le.fit_transform(df[col])

# Drop 'phone number'
df = df.drop('phone number', axis=1)

# Separate features (X) and target (y)
X = df.drop('churn', axis=1)
y = df['churn']


# ------------------------------------------------------------------------------
# Improved Train/Test Split with StratifiedShuffleSplit and class check
# ------------------------------------------------------------------------------

while True:  # Loop until we get both classes in training set
    splitter = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
    for train_index, test_index in splitter.split(X, y):
        X_train_initial, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train_initial, y_test = y.iloc[train_index], y.iloc[test_index]

    if len(y_train_initial.unique()) > 1:  # Check for both classes
        break  # Exit the loop if both classes are present

print("y_train_initial value counts:\n", y_train_initial.value_counts())  # Verify
print("y_test value counts:\n", y_test.value_counts())


# ------------------------------------------------------------------------------
# Address class imbalance using SMOTE (if needed)
# ------------------------------------------------------------------------------
smote = SMOTE(random_state=42)
X_train, y_train = smote.fit_resample(X_train_initial, y_train_initial)

# Scale numerical features using StandardScaler
numerical_cols = X_train.select_dtypes(include=np.number).columns
scaler = StandardScaler()
X_train[numerical_cols] = scaler.fit_transform(X_train[numerical_cols])
X_test[numerical_cols] = scaler.transform(X_test[numerical_cols])


# 2. Neural Network Architecture
def create_model(neurons1=64, neurons2=32, dropout1=0.2, dropout2=0.1, learning_rate=0.001):
    model = keras.Sequential([
        Input(shape=(X_train.shape[1],)),
        layers.Dense(neurons1, activation='relu'),
        layers.Dropout(dropout1),
        layers.Dense(neurons2, activation='relu'),
        layers.Dropout(dropout2),
        layers.Dense(1, activation='sigmoid')
    ])
    optimizer = keras.optimizers.Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
    return model

model = create_model()
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.1, verbose=0)

loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Loss: {loss}")
print(f"Test Accuracy: {accuracy}")

y_pred_proba = model.predict(X_test, verbose=0)
y_pred = (y_pred_proba > 0.5).astype(int)

print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))
print(f"Accuracy Score: {accuracy_score(y_test, y_pred)}")


# 3. Hyperparameter Tuning
model_cv = KerasClassifier(model=create_model, verbose=0)

param_grid = {
    'neurons1': [32, 64, 128],
    'neurons2': [16, 32, 64],
    'dropout1': [0.1, 0.2, 0.3],
    'dropout2': [0.1, 0.2],
    'learning_rate': [0.001, 0.01, 0.1],
    'epochs': [30, 50],
    'batch_size': [16, 32]
}

grid = GridSearchCV(estimator=model_cv, param_grid=param_grid, cv=3, verbose=1, scoring='accuracy')
grid_result = grid.fit(X_train, y_train)

print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

best_model = grid_result.best_estimator_.model
y_pred_tuned = best_model.predict(X_test, verbose=0)
y_pred_tuned = (y_pred_tuned > 0.5).astype(int)

print(confusion_matrix(y_test, y_pred_tuned))
print(classification_report(y_test, y_pred_tuned))
print(f"Tuned Model Accuracy Score: {accuracy_score(y_test, y_pred_tuned)}")