In [51]:
'''
    Challenge link
    https://preppindata.blogspot.com/2023/02/2023-week-6-dsb-customer-ratings.html
'''

import pandas as pd

In [52]:
df = pd.read_csv('DSB Customer Survery.csv')
df.head(5)

Unnamed: 0,Customer ID,Mobile App - Ease of Use,Mobile App - Ease of Access,Mobile App - Navigation,Mobile App - Likelihood to Recommend,Mobile App - Overall Rating,Online Interface - Ease of Use,Online Interface - Ease of Access,Online Interface - Navigation,Online Interface - Likelihood to Recommend,Online Interface - Overall Rating
0,535084,2,1,5,4,1,4,4,5,2,3
1,250892,3,5,4,4,2,5,5,2,4,3
2,544191,5,3,4,4,1,3,3,2,3,1
3,949343,2,5,4,3,1,1,4,3,5,1
4,915305,3,1,2,1,1,4,2,4,3,2


In [53]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 768 entries, 0 to 767
Data columns (total 11 columns):
 #   Column                                      Non-Null Count  Dtype
---  ------                                      --------------  -----
 0   Customer ID                                 768 non-null    int64
 1   Mobile App - Ease of Use                    768 non-null    int64
 2   Mobile App - Ease of Access                 768 non-null    int64
 3   Mobile App - Navigation                     768 non-null    int64
 4   Mobile App - Likelihood to Recommend        768 non-null    int64
 5   Mobile App - Overall Rating                 768 non-null    int64
 6   Online Interface - Ease of Use              768 non-null    int64
 7   Online Interface - Ease of Access           768 non-null    int64
 8   Online Interface - Navigation               768 non-null    int64
 9   Online Interface - Likelihood to Recommend  768 non-null    int64
 10  Online Interface - Overall Rating     

In [54]:
# 1) Reshape the data so we have 5 rows for each customer, with responses for the Mobile App and Online Interface being in separate fields on the same row
# 2) Clean the question categories so they don't have the platform in from of them
# 3) Exclude the Overall Ratings, these were incorrectly calculated by the system

cust_df = pd.DataFrame(columns=['Customer ID','Survey','Mobile App' ,'Online Interface'])
stp = 4
for i in range(0,len(df)):
    # Ease of Use
    cust_df.loc[len(cust_df)] = [df.loc[i,'Customer ID'], 
                                 'Ease of Use', 
                                 df.loc[i,'Mobile App - Ease of Use'], 
                                 df.loc[i,'Online Interface - Ease of Use']]
    # Ease of Access
    cust_df.loc[len(cust_df)] = [df.loc[i,'Customer ID'], 
                                 'Ease of Access', 
                                 df.loc[i,'Mobile App - Ease of Access'], 
                                 df.loc[i,'Online Interface - Ease of Access']]
    # Navigation
    cust_df.loc[len(cust_df)] = [df.loc[i,'Customer ID'] , 
                                 'Navigation' , 
                                 df.loc[i,'Mobile App - Navigation'] , 
                                 df.loc[i,'Online Interface - Navigation']]
    # Likelihood to Recommend
    cust_df.loc[len(cust_df)] = [df.loc[i,'Customer ID'] , 
                                 'Likelihood to Recommend' , 
                                 df.loc[i,'Mobile App - Likelihood to Recommend'] , 
                                 df.loc[i,'Online Interface - Likelihood to Recommend']]

In [55]:
cust_df.head(5)

Unnamed: 0,Customer ID,Survey,Mobile App,Online Interface
0,535084,Ease of Use,2,4
1,535084,Ease of Access,1,4
2,535084,Navigation,5,5
3,535084,Likelihood to Recommend,4,2
4,250892,Ease of Use,3,5


In [56]:
# Calculate the Average Ratings for each platform for each customer

avg_df = pd.DataFrame()
avg_df['Average Mobile'] = cust_df.groupby('Customer ID')['Mobile App'].mean()
avg_df['Average Online Interface'] = cust_df.groupby('Customer ID')['Online Interface'].mean()
avg_df = avg_df.reset_index()
avg_df.head(8)

Unnamed: 0,Customer ID,Average Mobile,Average Online Interface
0,101646,3.5,3.25
1,101650,2.25,3.0
2,105088,3.5,4.25
3,109306,2.0,2.0
4,110719,3.0,3.5
5,112474,2.0,2.5
6,112862,3.25,3.5
7,115507,2.0,3.5


In [57]:
#  Calculate the difference in Average Rating between Mobile App and Online Interface for each customer

avg_df['Difference'] = [x-y for x,y in zip(avg_df['Average Mobile'],avg_df['Average Online Interface'])]
avg_df.head(5)

Unnamed: 0,Customer ID,Average Mobile,Average Online Interface,Difference
0,101646,3.5,3.25,0.25
1,101650,2.25,3.0,-0.75
2,105088,3.5,4.25,-0.75
3,109306,2.0,2.0,0.0
4,110719,3.0,3.5,-0.5


In [58]:
# Catergorise customers as being:
# Mobile App Superfans if the difference is greater than or equal to 2 in the Mobile App's favour
# Mobile App Fans if difference >= 1
# Online Interface Fan
# Online Interface Superfan
# Neutral if difference is between 0 and 1

for i in range(len(avg_df)):
    x = float(avg_df.loc[i,'Difference'])
    if (x >= 2): avg_df.loc[i,'Fan'] = 'Mobile App Superfan'
    elif (x < 2) and (x >= 1): avg_df.loc[i,'Fan'] = 'Mobile App Fan'
    elif (x < 1) and (x > -1): avg_df.loc[i,'Fan'] = 'Neutral'
    elif (x <= -2): avg_df.loc[i,'Fan'] = 'Online Interface Superfan'
    elif (x > -2) and (x <= -1): avg_df.loc[i,'Fan'] = 'Online Interface Fan'

avg_df.head(5)


Unnamed: 0,Customer ID,Average Mobile,Average Online Interface,Difference,Fan
0,101646,3.5,3.25,0.25,Neutral
1,101650,2.25,3.0,-0.75,Neutral
2,105088,3.5,4.25,-0.75,Neutral
3,109306,2.0,2.0,0.0,Neutral
4,110719,3.0,3.5,-0.5,Neutral


In [59]:
avg_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 768 entries, 0 to 767
Data columns (total 5 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Customer ID               768 non-null    int64  
 1   Average Mobile            768 non-null    float64
 2   Average Online Interface  768 non-null    float64
 3   Difference                768 non-null    float64
 4   Fan                       768 non-null    object 
dtypes: float64(3), int64(1), object(1)
memory usage: 30.1+ KB


In [60]:
# Calculate the Percent of Total customers in each category, rounded to 1 decimal place

percent_df = avg_df.groupby('Fan')['Customer ID'].count()
percent_df = percent_df.reset_index()
total = len(avg_df)
percent_df[f'% of Total'] = [round((x/total)*100 , 1) for x in percent_df['Customer ID']]
percent_df.head(5)

Unnamed: 0,Fan,Customer ID,% of Total
0,Mobile App Fan,126,16.4
1,Mobile App Superfan,20,2.6
2,Neutral,489,63.7
3,Online Interface Fan,113,14.7
4,Online Interface Superfan,20,2.6


In [61]:
percent_df.to_csv('output.csv', index=False)