In [32]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [33]:
#################################################Set the backend of matplotlib to the 'inline' backend
%matplotlib inline

In [34]:
###############################################Formatting outputs using pandas
pd.options.display.max_rows = 10
pd.options.display.float_format = '{:,.2f}'.format
plt.rcParams['figure.figsize'] = (16,8)

In [None]:
################################################Loading Dataset
df = pd.read_csv("/content/Churn_Modelling.csv")
df

In [None]:
df.info()

In [None]:
df['Age'].value_counts().head(10000).plot.bar()

In [None]:
df['CreditScore'].value_counts().sort_index().plot.line()

In [None]:
df['Age'].value_counts().sort_index().plot.area()

In [None]:
df['CreditScore'].plot.hist()

In [None]:
df[df['CreditScore']<100000].plot.scatter(x='CreditScore', y='Age')

In [None]:
df[df['CreditScore']<100000].plot.hexbin(x='CreditScore', y='Age',gridsize=15)

In [None]:
df.sample(10).plot.bar(stacked=True)

In [None]:
df.plot.area()

In [None]:
plt.figure(figsize = (15,15))
sns.heatmap(df.corr(), annot = True)

In [None]:
df.describe

In [47]:
###################################Check Missing Values
def num_obj_features(df):
    num_cols = []
    obj_cols = []
    s= df.dtypes
    for i in s.index:
        if (s[i] == 'int64' or s[i] == 'float64'):
            num_cols.append(i)
        elif s[i]== 'object':
            obj_cols.append(i)
        else:
            pass
    return num_cols, obj_cols

num_col, obj_col = num_obj_features(df)

In [None]:
df[num_col].head()
df[obj_col].head()

In [50]:
##################################################Find And Replace Outliers
df.drop(['RowNumber', 'CustomerId', 'Surname'], axis = 1, inplace=True)

In [51]:
def find_outlier(data):
    outlier = []
    threshold = 3
    mean = np.mean(data)
    std = np.std(data)
    for i in data:
        z_score = (i - mean)/std
        if np.abs(z_score)>threshold:
            outlier.append(i)
    return outlier
Score_list = df['CreditScore'].tolist()
Balance_list = df['Balance'].tolist()
Salary_list = df['EstimatedSalary'].tolist()

In [None]:
Score_outlier = find_outlier(Score_list)
Score_outlier

In [None]:
Balance_outlier = find_outlier(Balance_list)
Balance_outlier

In [None]:
Salary_outlier = find_outlier(Salary_list)
Salary_outlier

In [None]:
print("Shape of Data before removing outliers: {}".format(df.shape))


In [56]:
df.drop(df[df['CreditScore'] <= 359].index, inplace = True)

In [None]:
print("Shape of Data after removing outliers: {}".format(df.shape))

In [58]:
############################################Split Dataset
X= df.drop(['Exited'], axis = 1)
y = df['Exited']

In [59]:
##########################################Creating Dummy Variables
dummy_vars = pd.get_dummies(X[['Geography', 'Gender']],drop_first=True)
X = X.drop(['Geography', 'Gender'], axis = 1)
X = pd.concat([X, dummy_vars], axis = 1)

In [60]:
###########################################Splitting Dataset Into Train Test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

In [61]:
############################################Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)