In [None]:
#importing all the required libraries
import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score
import lightgbm as lgb
from keras.models import Sequential
from keras.layers import Dense
from imblearn.combine import SMOTEENN

# Loading the dataset
data = pd.read_excel("C:/Users/SHIVA KARTHIK P/OneDrive/Desktop/customer_churn_large_dataset.xlsx")

# Handling missing values by filling with mean
data.fillna(data.mean(), inplace=True)

# Separating of features and target
numerical_cols = ['Age', 'Subscription_Length_Months', 'Monthly_Bill', 'Total_Usage_GB']
categorical_cols = ['Gender', 'Location']  # Assuming 'Name' and 'CustomerID' are not relevant

X_numerical = data[numerical_cols]
X_categorical = data[categorical_cols]

y = data['Churn']

# One-hot encoding of categorical variables
encoder = OneHotEncoder()
X_categorical_encoded = encoder.fit_transform(X_categorical)
X_categorical_encoded = pd.DataFrame(X_categorical_encoded.toarray(),
                                    columns=encoder.get_feature_names_out(categorical_cols))

# Concatenating numerical and encoded categorical features
X_final = pd.concat([X_numerical, X_categorical_encoded], axis=1)

# Spliting of data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_final, y, test_size=0.2, random_state=42)

# Applying feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Applying SMOTEENN for handling class imbalance
sm = SMOTEENN()
res_x, res_y = sm.fit_resample(X_final, y)
rx_train, rx_test, ry_train, ry_test = train_test_split(res_x, res_y, test_size=0.2)

# Training of LightGBM model
train_data = lgb.Dataset(rx_train, label=ry_train)
test_data = lgb.Dataset(rx_test, label=ry_test)
params = {
    'objective': 'binary',
    'metric': 'binary_logloss',
    'boosting_type': 'gbdt',
    'num_leaves': 128,
    'learning_rate': 0.05,
    'feature_fraction': 0.8,
    'bagging_fraction': 0.8,
    'bagging_freq': 5,
    'lambda_l1': 0.1,
    'lambda_l2': 0.1
}
num_round = 1500
bst = lgb.train(params, train_data, num_round, valid_sets=[test_data])

# Making predictions using LightGBM
y_pred_lightgbm = bst.predict(rx_test)

# Training Neural Network
model = Sequential()
model.add(Dense(256, input_dim=rx_train.shape[1], activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(rx_train, ry_train, epochs=150, batch_size=64, validation_data=(rx_test, ry_test), verbose=1)

# Making predictions using Neural Network
y_pred_nn = model.predict(rx_test)

# Combining predictions
combined_predictions = (y_pred_lightgbm + y_pred_nn.flatten()) / 2
y_pred_combined_binary = [1 if p >= 0.5 else 0 for p in combined_predictions]

# Evaluation of the combined model using accuracy
combined_accuracy = accuracy_score(ry_test, y_pred_combined_binary)
print("Combined Model Accuracy:", combined_accuracy)

# Geting feature importance from LightGBM
importance = bst.feature_importance()
feature_names = X_final.columns.tolist()
feature_importance = pd.DataFrame({'feature': feature_names, 'importance': importance})
feature_importance = feature_importance.sort_values(by='importance', ascending=False)
print("Feature Importance from LightGBM:", feature_importance)



In [24]:
#importing Pickle 
import pickle

filename='model.churn'


In [25]:
#saving model
pickle.dump(model,open(filename,'wb'))

In [None]:
#verifing saved model
load_model=pickle.load(open(filename,'rb'))