In [None]:

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
import warnings
warnings.filterwarnings("ignore")


In [None]:
data = pd.read_excel('customer_retention_dataset.xlsx','datasheet')
encoded_data = pd.read_excel('customer_retention_dataset.xlsx','codedsheet')

In [None]:
data.head()

In [None]:
data.columns

In [None]:
columns = ['gender','age','city','pinCode','shoppingSince','shoppingFrequency','internetAccessibility','deviceUsed',
           'screenSize','OS', 'browserUsed','channelFirstUsed','loginMode','timeSpentDeciding','paymentMode','cancellingFrequency',
          'cancellationReason','contentReadability','similarProductInfo','sellerProductInfo','productInfoClarity','navigationEase',
          'loadingProcessingSpeed','userFrienlyInterface','conveninetPaymentMode','timelyFulfilmentTrust','custSupportResponse',
          'custPrivacyGuarantee','variousChannelResponses','benefit','enjoy','convenience','returnReplacementPolicy','loyaltyProgramsAccess',
          'infoSatisfaction','siteQualitySatisfaction','netBenefitSatisfaction','trust','productSeveralCategory','relevantProductInfo','monetarySavings',
          'patronizingConvenience','adventureSense','socialStatus','gratification','roleFulfilment','moneyWorthy','shoppedFrom','easyWebApp',
           'visuallyAppealingWebApp','productVariety','completeProductInfo','fastWebApp','reliableWebApp','quickPurchase','paymentOptionsAvailability',
           'fastDelivery','custInfoPrivacy','finInfoSecurity','perceivedTrustworthiness','multiChannelAssistance','longLoginTime','longDisplayTime',
           'latePriceDeclare','longLoadingTime','limitedPaymentMode','lateDelivery','webAppDesignChange','pageDisruption','webAppEfficiency',
           'recommendation']

data.columns = columns
encoded_data.columns = columns
data.head()

In [None]:
data.info()

In [None]:
data.isnull().sum()

In [None]:
sns.heatmap(data.isnull())

In [None]:
data.describe(include='object')

In [None]:
data.describe()

In [None]:
for i in data.columns:
        print(data[i].value_counts())
        print('****************************************')

In [None]:
sns.countplot(data['gender'])
plt.title('Gender')
print(round(data['gender'].value_counts()/269*100),2)

In [None]:
!pip install plotly==5.8.2

In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [None]:
import plotly.express as px

In [None]:
city_count = pd.DataFrame(data['city'].value_counts()).reset_index()
city_count.columns = ['city','count']


fig = px.bar(city_count, x='city',y='count',
           color='city',
           title = 'Online Shopping is preferred in which cities?')
fig.show(height=200,width=200)

In [None]:
sns.countplot(data['age'])
plt.title('Age')
print(round(data['age'].value_counts()/269*100),2)

In [None]:
import seaborn as sns
sns.factorplot('gender',kind='count',data=data,hue='age',palette="hls")
plt.xticks(rotation=0);

In [None]:
dataset = data[['age','city','gender']]

fig = px.histogram(dataset,x='city',color='gender',title='Online shopping in cities based on Gender:')
fig.show()

In [None]:
fig = px.histogram(dataset,x='city',color='age',title='Online shopping in cities based on various age groups : ')
fig.show()

In [None]:
sns.factorplot('shoppingFrequency',kind='count',data=data,hue='shoppingSince',palette="husl")
plt.xticks(rotation=90);

In [None]:
sns.factorplot('shoppingFrequency',kind='count',data=data,hue='internetAccessibility',palette="Set2")
plt.xticks(rotation=90);

In [None]:
sns.factorplot('timeSpentDeciding',kind='count',data=data,hue='paymentMode',palette="flare")
plt.xticks(rotation=90);

In [None]:
reasons = pd.DataFrame(data['cancellationReason'].value_counts()).reset_index()
reasons.columns = ['Reason','Count']

fig = go.Figure(go.Funnelarea(
    text = reasons['Reason'],
    values = reasons['Count'],
    marker = {"colors": ["deepskyblue", "lightsalmon", "tan", "teal", "silver"],
                "line": {"color": ["wheat", "wheat", "wheat", "wheat", "wheat"], "width": [0, 1, 1, 0, 1]}}
    ))

fig.update_layout(title = 'Reasons for not purchasing any product :',title_x=0.5)
fig.show()

In [None]:
fig = px.histogram(data,x='cancellingFrequency',color='cancellationReason',
                   color_discrete_map={'Lack of trust':'#FF5722','Promo code not applicable':'#4DD0E1',
                                       'Better alternative offer':'#CDDC39','Change in price':'#FFEE58',
                                       'No preferred mode of payment':'#004D40'})
fig.update_layout(title='How often do people abandon their carts? Reasons:', title_x=0.4)
            
fig.show()

In [None]:
fig = px.histogram(data,x='cancellationReason',color='paymentMode')
fig.update_layout(title='Payment Mode vs Cancellation Reasons : ', title_x=0.5)
            
fig.show()

In [None]:
sns.factorplot('contentReadability',kind='count',data=data,hue='similarProductInfo',palette="crest")
plt.xticks(rotation=90);

In [None]:
sns.factorplot('userFrienlyInterface',kind='count',data=data,hue='conveninetPaymentMode',palette="rocket_r")
plt.xticks(rotation=90);

In [None]:
sns.countplot('longLoginTime',data=data,hue='longDisplayTime',palette="magma")
plt.xticks(rotation=90);

In [None]:
sns.countplot('limitedPaymentMode',data=data,hue='lateDelivery',palette="rocket_r")
plt.xticks(rotation=90);

In [None]:
sns.countplot('webAppDesignChange',data=data,hue='pageDisruption',palette="viridis")
plt.xticks(rotation=90);

In [None]:
sns.factorplot('webAppEfficiency',kind='count',data=data,hue='recommendation',palette="rocket_r")

plt.xticks(rotation=90);

In [None]:
sns.countplot('paymentOptionsAvailability',data=data,hue='fastDelivery',palette="flare")
plt.xticks(rotation=90);

In [None]:
categorical=[x for x in data.columns if data[x].dtype==object]
continous=[x for x in data.columns if x not in categorical]

def value_count(column):
    diff_count=len(data[column].value_counts())
    if diff_count<5:
        plt.figure(figsize=(10,5))
    elif diff_count<10:
        plt.figure(figsize=(10,6))
        plt.xticks(rotation=90)
    elif diff_count<20:
        plt.figure(figsize=(25,6))
        plt.xticks(rotation=90)
    else:
        plt.figure(figsize=(20,6))
        plt.xticks(rotation=90)
    sns.countplot(x=column,data=data,orient='v')
    plt.show()
    
    print(round(data[i].value_counts()/269*100),2)

In [None]:
for i in categorical:
    value_count(i)

In [None]:
df = encoded_data.iloc[:,:47]
le = LabelEncoder()
df['city'] = le.fit_transform(df['city'])
upper_triangle=np.triu(df.corr())
corr =df.corr()
plt.figure(figsize=(35, 10))

sns.heatmap(corr[(corr >= 0.5) | (corr <= -0.4)], 
            cmap='viridis', vmax=1.0, vmin=-1.0, linewidths=0.1,
            annot=True, annot_kws={"size": 8}, square=True,mask=upper_triangle);
plt.title("Correlation between variables: ")