In [1]:
import polars as pl

In [57]:
df = pl.read_csv('data/DSB Customer Survey.csv')

In [58]:
df.sort('Customer ID').head()

Customer ID,Mobile App - Ease of Use,Mobile App - Ease of Access,Mobile App - Navigation,Mobile App - Likelihood to Recommend,Mobile App - Overall Rating,Online Interface - Ease of Use,Online Interface - Ease of Access,Online Interface - Navigation,Online Interface - Likelihood to Recommend,Online Interface - Overall Rating
i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64
101646,3,5,2,4,5,2,4,3,4,2
101650,1,4,2,2,2,4,5,1,2,5
105088,5,1,5,3,1,5,5,5,2,5
109306,1,4,2,1,2,1,2,1,4,1
110719,3,3,5,1,3,5,5,1,3,1


In [115]:
mobile_cols = [col for col in df.columns if 'Mobile' in col]
online_cols = [col for col in df.columns if 'Online' in col]

mobile = (
    df.melt(
        id_vars='Customer ID', 
        value_vars=online_cols, 
        variable_name='Functionality', 
        value_name='Online Score'
    )
    .with_columns(
        pl.col('Functionality').str.split(' - ').list.get(1),
        pl.lit('Online').alias('Platform')
    )
)

online = (
    df.melt(
        id_vars='Customer ID', 
        value_vars=mobile_cols, 
        variable_name='Functionality', 
        value_name='Mobile Score'
    )
    .with_columns(
        pl.col('Functionality').str.split(' - ').list.get(1),
        pl.lit('Mobile').alias('Platform')
    )
)

cleaned = (
    mobile
    .join(online, how='inner', on=['Customer ID', 'Functionality'])
    .filter(
        pl.col('Functionality').ne('Overall Rating')
    )
    .with_columns(
        pl.col('Mobile Score').mean().over('Customer ID', 'Platform').alias('Mobile Avg Rating'),
        pl.col('Online Score').mean().over('Customer ID', 'Platform').alias('Online Avg Rating')
    )
    .with_columns(
        (pl.col('Online Avg Rating') - pl.col('Mobile Avg Rating')).alias('Rating Diff')
    )
    .with_columns(
        pl.when(pl.col('Rating Diff').le(-2)).then(pl.lit('Mobile App Superfan'))
        .when(pl.col('Rating Diff').le(-1)).then(pl.lit('Mobile App Fan'))
        .when(pl.col('Rating Diff').ge(2)).then(pl.lit('Online Interface Superfan'))
        .when(pl.col('Rating Diff').ge(1)).then(pl.lit('Online Interface Fan'))
        .when(pl.col('Rating Diff').is_between(-1, 1)).then(pl.lit('Neutral'))
        .alias('Preference')
    )
    .group_by('Preference')
    .agg(pl.count())
    .select(
        'Preference',
        (pl.col('count') / pl.sum('count') * 100).round(1).alias('% of Total')
    )
)
print(cleaned.head())

shape: (5, 3)
┌───────────────────────────┬───────┬────────────┐
│ Preference                ┆ count ┆ % of Total │
│ ---                       ┆ ---   ┆ ---        │
│ str                       ┆ u32   ┆ f64        │
╞═══════════════════════════╪═══════╪════════════╡
│ Neutral                   ┆ 1956  ┆ 63.7       │
│ Online Interface Fan      ┆ 452   ┆ 14.7       │
│ Online Interface Superfan ┆ 80    ┆ 2.6        │
│ Mobile App Superfan       ┆ 80    ┆ 2.6        │
│ Mobile App Fan            ┆ 504   ┆ 16.4       │
└───────────────────────────┴───────┴────────────┘
