# Business problem Identification

### Churn is a term that indicates number of customers a company loses over a specific time period. "Churn rate" designates the percentage of a company's total customers who churn, and often refers to service subscribers cancelling their memberships. Churn is a critical metric of company performance.
        - Predict the customers churn
        - Identify patterns

# Explorative Data analytics

## Data preparation & Cleaning

In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df = pd.read_csv('../input/churn-data/churn.csv')

In [3]:
df.head()

In [4]:
df = df.drop(["RowNumber",'CustomerId','Surname'], axis=1)

In [5]:
df

# Data Visualization 

## Automatic Data Visualization Using DataPrep

In [6]:
from pandas_profiling import ProfileReport

profile = ProfileReport(df, title="Data Visualize report", explorative=True)
profile.to_file("Visualize report new.html")

In [9]:
!pip install Dataprep
from dataprep.eda import create_report
create_report(df)

In [10]:
df.info()

# Feature selection & Feature Enginering

In [11]:
df['Geography'].value_counts()

In [12]:
df.head()

In [13]:
df.loc[df["Gender"] == "Male", "Gender"] = 1 

In [14]:
df.loc[df["Gender"] == "Female", "Gender"] = 0 

In [15]:
df.head()

In [16]:
df['Gender'] = df['Gender'].astype(int)

In [17]:
df.info()

In [18]:
df= pd.get_dummies(df)

In [19]:
df.head()

In [20]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
sacled1 = scaler.fit_transform(df[['CreditScore']])

In [21]:
sacled1 = pd.DataFrame(sacled1, columns = ['CreditScore'])

In [22]:
df['CreditScore'] = sacled1

In [23]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
sacled2 = scaler.fit_transform(df[['Balance']])

In [24]:
sacled2 = pd.DataFrame(sacled2, columns = ['Balance'])
df['Balance'] = sacled2

In [25]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
sacled3 = scaler.fit_transform(df[['Tenure']])
sacled3 = pd.DataFrame(sacled3, columns = ['Tenure'])
df['Tenure'] = sacled3


In [26]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
sacled4 = scaler.fit_transform(df[['EstimatedSalary']])
sacled4 = pd.DataFrame(sacled4, columns = ['EstimatedSalary'])
df['EstimatedSalary'] = sacled4


In [27]:
df.head()

In [28]:
x  = df.drop("Exited", axis=1)
y = df["Exited"]
x.head()

In [29]:
from imblearn.over_sampling import SMOTE
smote = SMOTE()
x, y = smote.fit_resample(x,y)

# Modeling

In [30]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.30)

In [31]:
#Random forest
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier( max_depth= 5, max_features= 'sqrt', n_estimators= 500)
rf.fit(x_train, y_train)

In [32]:
#ADA boost
from sklearn.ensemble import AdaBoostClassifier
ada = AdaBoostClassifier()
ada.fit(x_train, y_train)

In [33]:
#GB Boot
from sklearn.ensemble import GradientBoostingClassifier
gb = GradientBoostingClassifier(max_depth=2,subsample=1,n_estimators=250,learning_rate=0.1)
gb.fit(x_train, y_train)

In [34]:
from sklearn.model_selection import cross_val_score
def model_acc(model):
    acc = cross_val_score(model, x_test,y_test,cv=5).mean()
    print(str(model)+"--->"+str(acc))


In [35]:
model_acc(rf)
model_acc(ada)
model_acc(gb)

# Hyperparameter Optimization - GRIDSearchCV

In [36]:
from sklearn.model_selection import GridSearchCV

param_dist =  {
   'n_estimators': [100, 200,300,400,500],
   'max_depth' : [1,2,3,4,5,6,7,8,9],}

In [37]:
'''grid_search= GridSearchCV(estimator=rf, param_grid=param_dist)
grid_search.fit(x_train,y_train)'''

# Deep Learning

In [38]:
from tensorflow import keras

In [39]:
model = keras.Sequential([
    keras.layers.Dense(20, input_shape = (12,), activation ="relu"),
    keras.layers.Dense(20, input_shape = (10,), activation ="relu"),
    keras.layers.Dense(20, input_shape = (8,), activation ="relu"),
    keras.layers.Dense(20, input_shape = (6,), activation ="relu"),
    keras.layers.Dense(20, input_shape = (4,), activation ="relu"),
    keras.layers.Dense(20, input_shape = (2,), activation ="relu"),
    keras.layers.Dense(1,activation ="sigmoid"),
])

model.compile(optimizer='Nadam',
             loss='binary_crossentropy',
             metrics=['accuracy'])
model.fit(x_train,y_train,epochs=100)

In [40]:
prediction = model.predict(x_test)
prediction[:5]

In [41]:
y_test

In [42]:
y_pred=[]
for i in prediction:
    if i>0.5:
        y_pred.append(1)
    else:
        y_pred.append(0)
    

In [43]:
from sklearn.metrics import confusion_matrix, classification_report

print(classification_report(y_test, y_pred))