Requirements

1. Input the data
2. Reshape the data so we have 5 rows for each customer, with responses for the Mobile App and Online Interface being in separate fields on the same row
3. Clean the question categories so they don't have the platform in from of them
    e.g. Mobile App - Ease of Use should be simply Ease of Use
4. Exclude the Overall Ratings, these were incorrectly calculated by the system
5. Calculate the Average Ratings for each platform for each customer 
6. Calculate the difference in Average Rating between Mobile App and Online Interface for each customer
7. Catergorise customers as being:
    Mobile App Superfans if the difference is greater than or equal to 2 in the Mobile App's favour
    Mobile App Fans if difference >= 1
    Online Interface Fan
    Online Interface Superfan
    Neutral if difference is between 0 and 1
8. Calculate the Percent of Total customers in each category, rounded to 1 decimal place
9. Output the data


In [2]:
import pandas as pd

In [3]:
# Input Data

df = pd.read_csv('Preppin Data Inputs/DSB Customer Survery.csv')

In [4]:
df

Unnamed: 0,Customer ID,Mobile App - Ease of Use,Mobile App - Ease of Access,Mobile App - Navigation,Mobile App - Likelihood to Recommend,Mobile App - Overall Rating,Online Interface - Ease of Use,Online Interface - Ease of Access,Online Interface - Navigation,Online Interface - Likelihood to Recommend,Online Interface - Overall Rating
0,535084,2,1,5,4,1,4,4,5,2,3
1,250892,3,5,4,4,2,5,5,2,4,3
2,544191,5,3,4,4,1,3,3,2,3,1
3,949343,2,5,4,3,1,1,4,3,5,1
4,915305,3,1,2,1,1,4,2,4,3,2
...,...,...,...,...,...,...,...,...,...,...,...
763,374015,5,1,4,5,2,2,1,5,5,1
764,144922,5,5,3,2,4,4,4,4,4,3
765,421323,1,4,1,1,3,3,5,1,1,2
766,707580,1,1,5,4,5,5,4,2,4,1


In [5]:
df_mobile = df[['Customer ID', 'Mobile App - Ease of Use','Mobile App - Ease of Access',
                'Mobile App - Navigation','Mobile App - Likelihood to Recommend','Mobile App - Overall Rating']]

df_mobile

Unnamed: 0,Customer ID,Mobile App - Ease of Use,Mobile App - Ease of Access,Mobile App - Navigation,Mobile App - Likelihood to Recommend,Mobile App - Overall Rating
0,535084,2,1,5,4,1
1,250892,3,5,4,4,2
2,544191,5,3,4,4,1
3,949343,2,5,4,3,1
4,915305,3,1,2,1,1
...,...,...,...,...,...,...
763,374015,5,1,4,5,2
764,144922,5,5,3,2,4
765,421323,1,4,1,1,3
766,707580,1,1,5,4,5


In [6]:
# Reshape the data so we have 5 rows for each customer
# Clean the categories to remove platform in front of them

df_mobile = df_mobile.melt(id_vars='Customer ID', value_vars= ['Mobile App - Ease of Use','Mobile App - Ease of Access',
                'Mobile App - Navigation','Mobile App - Likelihood to Recommend','Mobile App - Overall Rating'],
                var_name='Mobile App' , value_name='Mobile App Value')

In [7]:
df_mobile['Mobile App'] = df_mobile['Mobile App'].str.split('-', expand=True)[1]

In [8]:
df_mobile

Unnamed: 0,Customer ID,Mobile App,Mobile App Value
0,535084,Ease of Use,2
1,250892,Ease of Use,3
2,544191,Ease of Use,5
3,949343,Ease of Use,2
4,915305,Ease of Use,3
...,...,...,...
3835,374015,Overall Rating,2
3836,144922,Overall Rating,4
3837,421323,Overall Rating,3
3838,707580,Overall Rating,5


In [9]:
df_online = df[['Customer ID', 'Online Interface - Ease of Use','Online Interface - Ease of Access',
                'Online Interface - Navigation','Online Interface - Likelihood to Recommend','Online Interface - Overall Rating']]

df_online

Unnamed: 0,Customer ID,Online Interface - Ease of Use,Online Interface - Ease of Access,Online Interface - Navigation,Online Interface - Likelihood to Recommend,Online Interface - Overall Rating
0,535084,4,4,5,2,3
1,250892,5,5,2,4,3
2,544191,3,3,2,3,1
3,949343,1,4,3,5,1
4,915305,4,2,4,3,2
...,...,...,...,...,...,...
763,374015,2,1,5,5,1
764,144922,4,4,4,4,3
765,421323,3,5,1,1,2
766,707580,5,4,2,4,1


In [10]:
df_online = df_online.melt(id_vars='Customer ID', value_vars= ['Customer ID', 'Online Interface - Ease of Use','Online Interface - Ease of Access',
                'Online Interface - Navigation','Online Interface - Likelihood to Recommend','Online Interface - Overall Rating'],
                var_name='Online Interface' , value_name='Online Interface Value')

In [11]:
df_online['Online Interface'] = df_online['Online Interface'].str.split('-', expand=True)[1]

In [12]:
df_online

Unnamed: 0,Customer ID,Online Interface,Online Interface Value
0,535084,Ease of Use,4
1,250892,Ease of Use,5
2,544191,Ease of Use,3
3,949343,Ease of Use,1
4,915305,Ease of Use,4
...,...,...,...
3835,374015,Overall Rating,1
3836,144922,Overall Rating,3
3837,421323,Overall Rating,2
3838,707580,Overall Rating,1


In [13]:
df = df_mobile.merge(right=df_online, how='inner', left_on=['Customer ID','Mobile App'], right_on=['Customer ID','Online Interface'])

In [14]:
# Checking we have the 5 rows per customer

df.sort_values('Customer ID')

Unnamed: 0,Customer ID,Mobile App,Mobile App Value,Online Interface,Online Interface Value
1031,101646,Ease of Access,5,Ease of Access,4
3335,101646,Overall Rating,5,Overall Rating,2
263,101646,Ease of Use,3,Ease of Use,2
2567,101646,Likelihood to Recommend,4,Likelihood to Recommend,4
1799,101646,Navigation,2,Navigation,3
...,...,...,...,...,...
3428,998229,Overall Rating,2,Overall Rating,4
2660,998229,Likelihood to Recommend,3,Likelihood to Recommend,2
356,998229,Ease of Use,4,Ease of Use,5
1892,998229,Navigation,5,Navigation,4


In [15]:
# Exclude Overall Rating

df = df[~df['Mobile App'].str.contains('Overall Rating')]

df.sort_values('Customer ID')

Unnamed: 0,Customer ID,Mobile App,Mobile App Value,Online Interface,Online Interface Value
1799,101646,Navigation,2,Navigation,3
1031,101646,Ease of Access,5,Ease of Access,4
263,101646,Ease of Use,3,Ease of Use,2
2567,101646,Likelihood to Recommend,4,Likelihood to Recommend,4
1606,101650,Navigation,2,Navigation,1
...,...,...,...,...,...
907,997926,Ease of Access,3,Ease of Access,3
1892,998229,Navigation,5,Navigation,4
356,998229,Ease of Use,4,Ease of Use,5
2660,998229,Likelihood to Recommend,3,Likelihood to Recommend,2


In [16]:
# Calculate the Average Ratings for each platform for each customer

df = df.groupby(by='Customer ID', as_index=False).mean(['Mobile App Value', 'Online Interface Value'])

df

Unnamed: 0,Customer ID,Mobile App Value,Online Interface Value
0,101646,3.50,3.25
1,101650,2.25,3.00
2,105088,3.50,4.25
3,109306,2.00,2.00
4,110719,3.00,3.50
...,...,...,...
763,994742,3.00,3.50
764,996508,2.50,3.00
765,997785,3.75,3.00
766,997926,3.50,3.75


In [17]:
# Calculate the difference in Average Rating between Mobile App and Online Interface for each customer

df['diff'] = df['Mobile App Value'] - df['Online Interface Value']

In [18]:
df

Unnamed: 0,Customer ID,Mobile App Value,Online Interface Value,diff
0,101646,3.50,3.25,0.25
1,101650,2.25,3.00,-0.75
2,105088,3.50,4.25,-0.75
3,109306,2.00,2.00,0.00
4,110719,3.00,3.50,-0.50
...,...,...,...,...
763,994742,3.00,3.50,-0.50
764,996508,2.50,3.00,-0.50
765,997785,3.75,3.00,0.75
766,997926,3.50,3.75,-0.25


In [19]:
# Categorise customers

def condition(x):
    if x >= 2:
        return 'Mobile App Superfan'
    elif x >= 1:
        return 'Mobile App Fans'
    elif x > 0 and x < 1:
        return 'Neutral'
    elif x <= -2:
        return 'Online Interface Superfan'
    elif x <= -1:
        return 'Online Interface Fan'
    else:
        return 'Neutral'


df['Preference'] = df['diff'].apply(condition)

In [20]:
df

Unnamed: 0,Customer ID,Mobile App Value,Online Interface Value,diff,Preference
0,101646,3.50,3.25,0.25,Neutral
1,101650,2.25,3.00,-0.75,Neutral
2,105088,3.50,4.25,-0.75,Neutral
3,109306,2.00,2.00,0.00,Neutral
4,110719,3.00,3.50,-0.50,Neutral
...,...,...,...,...,...
763,994742,3.00,3.50,-0.50,Neutral
764,996508,2.50,3.00,-0.50,Neutral
765,997785,3.75,3.00,0.75,Neutral
766,997926,3.50,3.75,-0.25,Neutral


In [21]:
# Calculate the Percent of Total customers in each category, rounded to 1 decimal place

df_percent = (df.groupby('Preference')['Customer ID'].count() / df['Customer ID'].count()) * 100

In [22]:
df_percent = df_percent.reset_index()

In [23]:
df_percent.columns = ['Preference', '% of Total']

In [24]:
df_percent['% of Total'] = df_percent['% of Total'].round(1)

In [25]:
df_percent

Unnamed: 0,Preference,% of Total
0,Mobile App Fans,16.4
1,Mobile App Superfan,2.6
2,Neutral,63.7
3,Online Interface Fan,14.7
4,Online Interface Superfan,2.6


In [26]:
# Output the data

df_percent.to_csv('Preppin Data Outputs/pd2023wk6_output.csv', index=False)