title: Metal amazon cashback
author: Fabio Schmidt-Fischbach   
date: 2021-1-10  
region: EU   
link: https://docs.google.com/presentation/d/1LYABYZPOAjHKwcSeuy7LNpsZ6UlCKCACqSaGV6JNyxM/edit?usp=sharing     
summary: Offer 100% cashback to amazon prime subscriptions for all Metal users in Spain & Italy to increase retention and growth. Start on 24.08.2020. None of the analysis is causal. We are analysing time trends - this initiative is not happening in isolation and not all changes are causally attributable to it. SU+upgrade numbers look good / high marketing spend in those months, however, put them into perspective. No conclusive evidence for growth outperformance. So far, it does not look like the partnership accelerated growth disproportionally. Further, first retention results of new cohorts are rather negative, but overall retention for all users (e.g. collected revenue per fee) grew.
tags: memberships, amazon, retention, growth, partnership, metal, italy, spain   

In [1]:
import psycopg2
import psycopg2.extras
import pandas as pd
import time
import math
import os
import numpy as np
import altair as alt


def exponential_backoff(fn):
    """Exponential backoff upon Error."""

    def wrapper(*args, **kwargs):
        i = 0
        while i < 5:
            try:
                time.sleep(1)
                print("Calling DB.")
                data = fn(*args, **kwargs)
                return data
            except Exception as e:
                # Exponential backoff in case of rate limiting
                sleep_int = math.pow(4, i + 2)
                print(f"Sleeping {sleep_int} seconds! Because of:")
                print(e)
                time.sleep(sleep_int)
                print(f"Slept {sleep_int} seconds!")
                i += 1
        return None

    return wrapper


def connect_redshift():
    dbname = "n26"
    host = "n26-dwh.cfxsmcyyfcch.eu-central-1.redshift.amazonaws.com"
    pwd = ""
    username = ""
    try:
        conn = psycopg2.connect(
            host=host,
            user=username,
            port=5439,
            password=pwd,
            dbname=dbname,
            options="-c statement_timeout=500000",
        )
    except psycopg2.OperationalError as e:
        print("Unable to connect!\n{}".format(e))
        raise

    return conn


def cursor_redshift(conn):
    return conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)


@exponential_backoff
def query_to_df(query):
    conn = connect_redshift()
    cur = cursor_redshift(conn)
    try:
        cur.execute(query)
        result = cur.fetchall()
        conn.commit()
        conn.close()
    except Exception:
        conn.rollback()
        conn.close()
        raise

    result = pd.DataFrame(result)
    return result

  """)


In [8]:
import os
import time
from decimal import *

query_raw = """
        select date_trunc('month', zt.created) as month, 
		bank_balance_impact, 
		cmd_users.id as user_id 
from etl_reporting.zr_transaction as zt 
LEFT JOIN (
    select 
        account_id,
        user_created,
        /* If there is only one account owner, this will always be 1.
        If there are multiple potential account owners we assume the
        first one who was created is the actual first one. */
        ROW_NUMBER() over (partition by account_id order by created) as rn
    from cr_user_account as cua 
    where coalesce(cua.user_role, 'OWNER') = 'OWNER'
) as cua on cua.account_id = zt.account_id and rn = 1
inner join cmd_users using (user_created)
where reference_text = 'N26 Metal: Amazon Prime Cashback' 
        and zt.created >= '2020-08-01'
"""

df = query_to_df(query_raw)
df.to_csv("cashbacks.csv", index=False, compression="gzip", chunksize=100000)

In [64]:
query_raw = """
        select month,
                start_time, 
                dbt.zrh_users.user_id,
                dbt.zrh_user_product.product_id,
                case when act.user_created is not null then 1 else 0 end as mau,
                dbt.zrh_users.tnc_country_group,
                enter_reason, 
                subscription_valid_from, 
                subscription_valid_until, 
                case when subscription_valid_from >= '2020-08-24' then 'Post' else 'Pre' end as group
        from dwh_cohort_months 
        left join dbt.zrh_user_product 
            on end_time between subscription_valid_from and subscription_valid_until 
        inner join dbt.zrh_users using (user_created) 
        left join dbt.zrh_user_activity_txn as act on act.user_created = dbt.zrh_users.user_created
            and end_time between activity_start and activity_end and activity_type = '1_tx_35'
        where dbt.zrh_user_product.product_id in ('METAL_CARD_MONTHLY','BUSINESS_METAL')
        and end_time between '2020-07-01' and current_date
        and country_tnc_legal in ('ITA','ESP')
"""

users = query_to_df(query_raw)
users.to_csv("users.csv", index=False, compression="gzip", chunksize=100000)

Calling DB.


## How many ppl were reimbursed / how much money was reimbursed ? 


In [40]:
df = pd.read_csv("cashbacks.csv", compression="gzip")

df = (
    df.groupby(["month"])
    .agg({"bank_balance_impact": np.sum, "user_id": pd.Series.nunique})
    .reset_index()
)

df = df.melt(id_vars="month")

df.loc[df["variable"] == "user_id", "variable"] = "# of users"
df.loc[df["variable"] == "bank_balance_impact", "variable"] = "Total Euro reimbursed"


alt.Chart(df).mark_line().encode(
    x=alt.X("month:T", axis=alt.Axis(title="Month")),
    y=alt.Y("value:Q", axis=alt.Axis(title="Adoption")),
    color="variable:N",
).properties(width=500, height=500)

In [68]:
df = pd.read_csv("cashbacks.csv", compression="gzip")
users = pd.read_csv("users.csv", compression="gzip")

df["month"] = df["month"].astype(str).str.slice(stop=10)
df = df.groupby(["user_id", "month"])["bank_balance_impact"].agg("sum").reset_index()

users["start_time"] = users["start_time"].astype(str)

users = users.merge(
    df, left_on=["user_id", "start_time"], right_on=["user_id", "month"], how="left"
)

users = users.loc[users["mau"] == 1, :]

# check what % of mau got cashback.
users["cashback"] = 1
users.loc[users["bank_balance_impact"].isnull() == True, "cashback"] = 0

# aggregate
u = users.groupby(["month_x", "enter_reason"])["cashback"].agg("mean").reset_index()
u2 = users.groupby(["month_x"])["cashback"].agg("mean").reset_index()
u2["enter_reason"] = "TOTAL"

u = u.append(u2)

alt.Chart(u).mark_line().encode(
    x=alt.X("month_x:T", axis=alt.Axis(title="Month")),
    y=alt.Y(
        "cashback:Q",
        axis=alt.Axis(format="%", title="% of MAUs that received cashback"),
    ),
    color="enter_reason:N",
).properties(width=500, height=500)

In [72]:
df = pd.read_csv("cashbacks.csv", compression="gzip")
users = pd.read_csv("users.csv", compression="gzip")

df["month"] = df["month"].astype(str).str.slice(stop=10)
df = df.groupby(["user_id", "month"])["bank_balance_impact"].agg("sum").reset_index()

users["start_time"] = users["start_time"].astype(str)

users = users.merge(
    df, left_on=["user_id", "start_time"], right_on=["user_id", "month"], how="inner"
)

users = users.groupby(["group"])["user_id"].agg("nunique").reset_index()

users["user_id"] = 100 * users["user_id"] / sum(users["user_id"])

alt.Chart(users).mark_bar().encode(
    x=alt.X(
        "group:N",
        axis=alt.Axis(title="Subscription start before or after launch of initiative"),
    ),
    y=alt.Y("user_id:Q", axis=alt.Axis(title="% of cashback recipients")),
).properties(width=500, height=500)

In [None]:
df = pd.read_csv("cashbacks.csv", compression="gzip")
users = pd.read_csv("users.csv", compression="gzip")

df["month"] = df["month"].astype(str).str.slice(stop=10)
df = df.groupby(["user_id", "month"])["bank_balance_impact"].agg("sum").reset_index()

users["start_time"] = users["start_time"].astype(str)

users = users.merge(
    df, left_on=["user_id", "start_time"], right_on=["user_id", "month"], how="inner"
)

users = users.groupby(["group"])["user_id"].agg("nunique").reset_index()

users["user_id"] = 100 * users["user_id"] / sum(users["user_id"])

alt.Chart(users).mark_bar().encode(
    x=alt.X(
        "group:N",
        axis=alt.Axis(title="Subscription start before or after launch of initiative"),
    ),
    y=alt.Y("user_id:Q", axis=alt.Axis(title="% of cashback recipients")),
).properties(width=500, height=500)

In [83]:
df = pd.read_csv("cashbacks.csv", compression="gzip")
users = pd.read_csv("users.csv", compression="gzip")

df["month"] = df["month"].astype(str).str.slice(stop=10)
df = df.groupby(["user_id", "month"])["bank_balance_impact"].agg("sum").reset_index()

users["start_time"] = users["start_time"].astype(str)

users = users.merge(
    df, left_on=["user_id", "start_time"], right_on=["user_id", "month"], how="left"
)

users.loc[users["subscription_valid_from"] >= "2020-01-01", :]
users["subscription_valid_from"] = (
    users["subscription_valid_from"].astype(str).str.slice(stop=7)
)

# check what % of mau got cashback.
users["cashback"] = 1
users.loc[users["bank_balance_impact"].isnull() == True, "cashback"] = 0

users = (
    users.groupby(["subscription_valid_from", "enter_reason"])["cashback"]
    .agg("mean")
    .reset_index()
)


alt.Chart(users).mark_line().encode(
    x=alt.X(
        "subscription_valid_from:N", axis=alt.Axis(title="Subscription start (cohorts)")
    ),
    y=alt.Y(
        "cashback:Q",
        axis=alt.Axis(format="%", title="% of cohort that received cashback"),
    ),
    color="enter_reason:N",
).properties(width=500, height=500, title="% of cohort (x-axis) that received cashback")

In [91]:
query_raw = """
        select user_id, 
                date_trunc('month', subscription_valid_from) as cohort, 
                enter_reason, 
                market, 
                zpp.product_id, 
                payment_no, 
                paid,
                amount_cents, 
                days_delay, 
                arrears, 
                charged
        from dbt.zrh_subscription_payments as zpp
        inner join dbt.zrh_users using (user_created) 
        where country_tnc_legal in ('ESP','ITA')
        and zpp.product_id in ('METAL_CARD_MONTHLY','BUSINESS_METAL')
        and dateadd('day',35, dateadd('month',1,date_trunc('month',hypothetical_fee_date))) <= current_date 
        and cohort >= '2020-01-01'
        
"""

charges = query_to_df(query_raw)
charges.to_csv("charges.csv", index=False, compression="gzip", chunksize=100000)

Calling DB.


In [99]:
df = pd.read_csv("cashbacks.csv", compression="gzip")
charges = pd.read_csv("charges.csv", compression="gzip")

df["chargeback"] = 1
df = df.groupby(["user_id"])["chargeback"].max().reset_index()

charges = charges.merge(df, on=["user_id"], how="left").fillna(0)

charges["cohort"] = charges["cohort"].astype(str).str.slice(stop=7)


charges.loc[charges["days_delay"] >= 35, "paid"] = False

charges = (
    charges.groupby(["cohort", "payment_no", "chargeback"])["paid"]
    .agg("mean")
    .reset_index()
)

alt.Chart(charges.loc[charges["payment_no"] <= 4, :]).mark_line().encode(
    x=alt.X("cohort:N", axis=alt.Axis(title="Cohorts")),
    y=alt.Y(
        "paid:Q",
        axis=alt.Axis(format="%", title="% of cohort that paid within 35 days"),
    ),
    color="chargeback:N",
).properties(
    width=300, height=200, title="% of cohort (x-axis) that received cashback"
).facet(
    facet="payment_no", columns=2
)

In [102]:
df = pd.read_csv("cashbacks.csv", compression="gzip")
charges = pd.read_csv("charges.csv", compression="gzip")

df["chargeback"] = 1
df = df.groupby(["user_id"])["chargeback"].max().reset_index()

charges = charges.merge(df, on=["user_id"], how="left").fillna(0)

charges["cohort"] = charges["cohort"].astype(str).str.slice(stop=7)


charges.loc[charges["days_delay"] >= 35, "paid"] = False

charges = charges.loc[charges["enter_reason"].isin(["SIGNUP", "UPGRADED"]) == True, :]

charges = (
    charges.groupby(["cohort", "payment_no", "chargeback"])["paid"]
    .agg("mean")
    .reset_index()
)

alt.Chart(charges.loc[charges["payment_no"] <= 4, :]).mark_line().encode(
    x=alt.X("cohort:N", axis=alt.Axis(title="Cohorts")),
    y=alt.Y(
        "paid:Q",
        axis=alt.Axis(format="%", title="% of cohort that paid within 35 days"),
    ),
    color="chargeback:N",
).properties(
    width=300, height=200, title="% of cohort (x-axis) that received cashback"
).facet(
    facet="payment_no", columns=2
)

# How many users changed their behaviour? 


In [None]:
query_raw = """



with cashbacks as ( 
    select cmd_users.id as user_id,
           'cashback' as label, 
        min(zt.created) as first_cashback
    from etl_reporting.zr_transaction as zt 
    LEFT JOIN (
        select 
            account_id,
            user_created,
            ROW_NUMBER() over (partition by account_id order by created) as rn
        from cr_user_account as cua 
        where coalesce(cua.user_role, 'OWNER') = 'OWNER'
    ) as cua on cua.account_id = zt.account_id and rn = 1
    inner join cmd_users using (user_created)
    where reference_text = 'N26 Metal: Amazon Prime Cashback' 
            and zt.created >= '2020-08-01'
    group by 1,2 
), others as (

select user_id  , 'no cashback' as label, '2020-08-24'::date as first_cashback
from dbt.zrh_users
left join cashbacks using (user_id)
where product_id in ('BUSINESS_METAL','METAL_CARD_MONTHLY') and country_tnc_legal in ('ESP','ITA')
    and cashbacks.user_id is null 
),
all_sample as ( 

select * 
from others 

union all 

select * 
from cashbacks

)

select all_sample.user_id, 
        first_cashback, 
        country_tnc_legal, 
        case when product_id like 'BUSINESS%' then 'Business' else 'Personal' end as type, 
        datediff('month', '2020-08-24'::date, TO_DATE(month::text, 'YYYY:MM')) as diff, 
        sum(balance_eur)
from all_sample 

inner join dbt.zrh_users as zu 
    on zu.user_id = all_sample.user_id 
    and datediff('month', kyc_first_completed::date, first_cashback::date) >= 6 -- older than 6 months. 
left join dbt.mmb_monthly_balance_aud as mmb
    on datediff('months', '2020-08-24'::date, TO_DATE(month::text, 'YYYY:MM')) <= 3 
    and datediff('months', '2020-08-24'::date, TO_DATE(month::text, 'YYYY:MM')) >= -3
    and mmb.user_created = zu.user_created
group by 1,2,3,4,5

"""

In [37]:
df = pd.read_csv("balances.csv")

df["cashback"] = 1
df.loc[df["first_cashback"].astype(str) == "2020-08-24 02:00:00", "cashback"] = 0

df["sum"] = df["sum"].round(-2)


df = df.groupby(["sum", "cashback", "diff"])["user_id"].agg("nunique").reset_index()

df["perc"] = (
    100 * df["user_id"] / df.groupby(["cashback", "diff"])["user_id"].transform("sum")
)
df["cum"] = df.groupby(["cashback", "diff"])["perc"].cumsum()


alt.Chart(df.loc[abs(df["sum"]) <= 1000, :]).mark_line().encode(
    x="sum:Q", y="cum:Q", color="cashback:N", column="diff:N"
)

In [45]:
def percentile(n):
    def percentile_(x):
        return np.percentile(x, n)

    percentile_.__name__ = "percentile_%s" % n
    return percentile_


df = pd.read_csv("balances.csv")

df["cashback"] = 1
df.loc[df["first_cashback"].astype(str) == "2020-08-24 02:00:00", "cashback"] = 0

df = (
    df.groupby(["diff", "type", "cashback"])["sum"]
    .agg([percentile(25), percentile(50), percentile(75)])
    .reset_index()
)

df = df.melt(id_vars=["diff", "type", "cashback"])

alt.Chart(df.loc[df["type"] == "Personal", :]).mark_line().encode(
    x="diff:N", y="value:Q", color="cashback:N", column="variable:N"
).properties(width=300, height=300)

## previous users.

In [1]:
query_raw = """

select zu.user_id, 
		max(zct.created)
from dbt.zrh_card_transactions as zct 
inner join dbt.zrh_users as zu
on zu.user_created = zct.user_created and country_tnc_legal in ('ESP','ITA')
where merchant_name = 'Amazon Prime' and created < '2020-08-24'
group by 1 
"""

In [11]:
df = pd.read_csv("cashbacks.csv", compression="gzip")
prev = pd.read_csv("previous_recipients.csv")

df = df.merge(prev, on=["user_id"], how="left")

df["previous"] = 0
df.loc[df["max"].isnull() == False, "previous"] = 1

df = df.groupby(["month", "previous"])["user_id"].agg("nunique").reset_index()

df["perc"] = 100 * df["user_id"] / df.groupby(["month"])["user_id"].transform("sum")
df = df.loc[df["previous"] == 1, :]

alt.Chart(df).mark_line().encode(
    x=alt.X("month:T", axis=alt.Axis(title="Month")),
    y=alt.Y(
        "perc:Q",
        axis=alt.Axis(title="% of recipients in month that had previous prime"),
    ),
).properties(
    width=300,
    height=300,
    title="% of cashback recipients with previous Prime membership linked to n26",
)

In [13]:
df = pd.read_csv("cashbacks.csv", compression="gzip")
prev = pd.read_csv("previous_recipients.csv")

df = df.merge(prev, on=["user_id"], how="left")

df["previous"] = 0
df.loc[df["max"].isnull() == False, "previous"] = 1

df = df.groupby(["previous"])["user_id"].agg("nunique").reset_index()

df["perc"] = 100 * df["user_id"] / sum(df["user_id"])
df = df.loc[df["previous"] == 1, :]

df.head()

Unnamed: 0,previous,user_id,perc
1,1,578,40.083218
