In [73]:
import pandas as pd

In [74]:
"""Requirements
- Input the data
- Reshape the data so we have 5 rows for each customer, with responses for the Mobile App and Online Interface being in separate fields on the same row
- Clean the question categories so they don't have the platform in from of them
    e.g. Mobile App - Ease of Use should be simply Ease of Use
- Exclude the Overall Ratings, these were incorrectly calculated by the system
- Calculate the Average Ratings for each platform for each customer 
- Calculate the difference in Average Rating between Mobile App and Online Interface for each customer
- Catergorise customers as being:
        Mobile App Superfans if the difference is greater than or equal to 2 in the Mobile App's favour
        Mobile App Fans if difference >= 1
        Online Interface Fan
        Online Interface Superfan
        Neutral if difference is between 0 and 1
- Calculate the Percent of Total customers in each category, rounded to 1 decimal place

Output the data
"""

"Requirements\n- Input the data\n- Reshape the data so we have 5 rows for each customer, with responses for the Mobile App and Online Interface being in separate fields on the same row\n- Clean the question categories so they don't have the platform in from of them\n    e.g. Mobile App - Ease of Use should be simply Ease of Use\n- Exclude the Overall Ratings, these were incorrectly calculated by the system\n- Calculate the Average Ratings for each platform for each customer \n- Calculate the difference in Average Rating between Mobile App and Online Interface for each customer\n- Catergorise customers as being:\n        Mobile App Superfans if the difference is greater than or equal to 2 in the Mobile App's favour\n        Mobile App Fans if difference >= 1\n        Online Interface Fan\n        Online Interface Superfan\n        Neutral if difference is between 0 and 1\n- Calculate the Percent of Total customers in each category, rounded to 1 decimal place\n\nOutput the data\n"

In [75]:
df = pd.read_csv('DSB Customer Survery.csv')

In [76]:
#Reshape the data
df = pd.melt(df, id_vars='Customer ID', var_name='Questions', value_name='Response' )
df

Unnamed: 0,Customer ID,Questions,Response
0,535084,Mobile App - Ease of Use,2
1,250892,Mobile App - Ease of Use,3
2,544191,Mobile App - Ease of Use,5
3,949343,Mobile App - Ease of Use,2
4,915305,Mobile App - Ease of Use,3
...,...,...,...
7675,374015,Online Interface - Overall Rating,1
7676,144922,Online Interface - Overall Rating,3
7677,421323,Online Interface - Overall Rating,2
7678,707580,Online Interface - Overall Rating,1


In [77]:
#Clean the question categories
df[['Platform', "Questions"]] = df['Questions'].str.split('-', n=1, expand= True)

In [78]:
#Exclude the Overall Ratings
df =df[df['Questions'] != ' Overall Rating']

In [79]:
#calculate the Average Ratings for each platform for each customer and reshape the data
df2 = df.groupby(['Customer ID', 'Platform'])['Response'].mean().reset_index()
df2 = pd.pivot(df2, index = 'Customer ID', columns = 'Platform', values = 'Response').reset_index()
df2

Platform,Customer ID,Mobile App,Online Interface
0,101646,3.50,3.25
1,101650,2.25,3.00
2,105088,3.50,4.25
3,109306,2.00,2.00
4,110719,3.00,3.50
...,...,...,...
763,994742,3.00,3.50
764,996508,2.50,3.00
765,997785,3.75,3.00
766,997926,3.50,3.75


In [80]:
#Calculate difference in average response
df2['Difference'] = df2['Mobile App '] - df2['Online Interface ']

In [81]:
#Categorize by preference
def func(df):
    if df['Difference'] >=2: 
        return 'Mobile App SuperFan'
    elif df['Difference'] >= 1 :
        return 'Mobile App Fan'
    elif df['Difference'] > -1 and df['Difference'] < 1: 
        return 'Neutral'
    elif df['Difference'] >= -2 and df['Difference'] < -1: 
        return 'Online Interface Fan'
    else:
        return 'Online Interface Super Fan'

df2['Perference'] = df2.apply(func, axis='columns')

In [82]:
#Get the output using value_counts
output = (df2['Perference'].value_counts(normalize=True).mul(100).round(1).astype(str) + '%').to_frame()

In [83]:
output

Unnamed: 0,Perference
Neutral,63.7%
Mobile App Fan,16.4%
Online Interface Fan,10.7%
Online Interface Super Fan,6.6%
Mobile App SuperFan,2.6%
