title: Unlimited ATM for Metal
author: Fabio Schmidt-Fischbach   
date: 2020-08-31   
region: EU   
link: https://docs.google.com/presentation/d/1WDCn8qTi5q9mTR6qL13DGyb42Ju_66tV2auguQrQSXE/edit?usp=sharing    
summary: What % is currently using more than 8 ATMs per month? What % decrease of ATMs per FAU did we see there? Between 8 and 13% of Metal MAUs (depending on market)  used more than 5 ATM withdrawals in July. 20% of all ATM withdrawals are generated by users with more than 10 withdrawals per month (roughly 5% of users). Within Fair Use ATM users generate 50% of all atm withdrawals. In July, unlimited ATM for Metal would have meant paying the cost for the remaining 50% (60k withdrawals). The introduction of FairUse in our core markets reduced the # of ATMs per FAU by 20-50%. We need to expect a behavioral change once we remove fairuse <-> the overall impact will be > 60k withdrawals per month.
tags: premium, memberships, atm, metal, fair use, tiering    

In [2]:
import os
import pandas as pd
import numpy as np
import altair as alt

In [2]:
query = """

with sample as ( 

select user_id, user_created, kyc_first_completed, country_tnc_legal, tnc_country_group
from dbt.zrh_users 

),
counts as ( 

select sample.*, zup.product_id,
		start_time, 
		count(distinct zt.id) as atms 
from dwh_cohort_months as dwh 
inner join dbt.zrh_user_activity_txn as act 
	on act.activity_type = '1_tx_35'
	and end_time between activity_start and activity_end 
left join dwh_adorable_transaction as zt 
	on zt.user_created = act.user_created 
	and date_trunc('month',zt.created) = start_time
	and mcc_group = 1 
	and type = 'PT'
inner join sample on sample.user_created  = act.user_created 
inner join dbt.zrh_user_product as zup on zup.user_created = act.user_created and end_time between subscription_valid_from and subscription_valid_until 
where start_time between '2016-01-01' and '2018-03-01'
group by 1,2,3,4,5,6,7
)

select * from counts 	

"""

## Look at distribution of ATM withdrawals of MAUs in old times.

In [24]:
df = pd.read_csv("atm_old.csv")

df = (
    df.groupby(["start_time", "tnc_country_group"])["atms"]
    .quantile([0.5, 0.70, 0.90, 0.95, 0.99], "lower")
    .reset_index()
)

df["Percentile"] = (df["level_2"] * 100).astype(int)
df["start_time"] = pd.to_datetime(df["start_time"]).dt.to_period("M").astype(str)

alt.Chart(
    df.loc[df["tnc_country_group"].str.contains("DEU") == True, :]
).mark_line().encode(
    x=alt.X("start_time:N", axis=alt.Axis(title="Month")),
    y=alt.Y("atms:Q", axis=alt.Axis(title="# monthly ATMs")),
    color="Percentile:N",
).properties(
    width=400,
    height=400,
    title="Percentile of # monthly ATM per FAU across time for DEU ",
)

In [112]:
df = pd.read_csv("atm_old.csv")

df["high"] = 0
df.loc[df["atms"] > 5, "high"] = 1

df = df.groupby(["start_time", "tnc_country_group"])["high"].agg("mean").reset_index()

alt.Chart(
    df.loc[df["tnc_country_group"].str.contains("DEU") == True, :]
).mark_line().encode(
    x=alt.X("start_time:N", axis=alt.Axis(title="Month")),
    y=alt.Y(
        "high:Q", axis=alt.Axis(title="% of FAUs above 5 ATMs per month", format="%")
    ),
).properties(
    width=400, height=400, title="% of FAUs with more than 5 ATMs per month in DEU"
)

In [113]:
df = pd.read_csv("atm_old.csv")

df["atm_costs"] = df["atms"] * 2
df.loc[df["atms"] >= 5, "atm_costs"] = 10

df = (
    df.groupby(["start_time", "tnc_country_group"])["atm_costs"]
    .agg("mean")
    .reset_index()
)

alt.Chart(
    df.loc[df["tnc_country_group"].str.contains("DEU") == True, :]
).mark_line().encode(
    x=alt.X("start_time:N", axis=alt.Axis(title="Month")),
    y=alt.Y("atm_costs:Q", axis=alt.Axis(title="Avg. ATM costs per FAU")),
).properties(
    width=400, height=400, title="Avg. ATM costs per FAU in DEU"
)

In [31]:
df = pd.read_csv("atm_old.csv")

df["atms"] = df["atms"].fillna(0)
df = df.groupby(["start_time", "tnc_country_group"])["atms"].agg("mean").reset_index()

alt.Chart(
    df.loc[df["tnc_country_group"].str.contains("DEU") == True, :]
).mark_line().encode(
    x=alt.X("start_time:N", axis=alt.Axis(title="Month")),
    y=alt.Y("atms:Q", axis=alt.Axis(title="Avg. # of ATMs per FAU")),
).properties(
    width=600, height=400, title="Avg. # of ATMs per FAU in DEU"
)

In [51]:
df = pd.read_csv("atm_old.csv")

df["tnc_country_group"] = df["tnc_country_group"].str.strip()
df = df.loc[df["tnc_country_group"].isin(["NEuro", "Other", "GrE"]) == False, :]

df = (
    df.groupby(["start_time", "tnc_country_group"])["atms"]
    .quantile([0.5, 0.70, 0.90, 0.95, 0.99], "lower")
    .reset_index()
)

df["Percentile"] = (df["level_2"] * 100).astype(int)
df["start_time"] = pd.to_datetime(df["start_time"]).dt.to_period("M").astype(str)

alt.Chart(
    df.loc[df["tnc_country_group"].str.contains("DEU") == False, :]
).mark_line().encode(
    x=alt.X("start_time:N", axis=alt.Axis(title="Month")),
    y=alt.Y("atms:Q", axis=alt.Axis(title="# monthly ATMs")),
    color="Percentile:N",
).properties(
    width=300,
    height=200,
    title="Percentile of # monthly ATM per FAU across time for DEU ",
).facet(
    facet="tnc_country_group", columns=2
)

In [52]:
df = pd.read_csv("atm_old.csv")

df["tnc_country_group"] = df["tnc_country_group"].str.strip()
df = df.loc[df["tnc_country_group"].isin(["NEuro", "Other", "GrE"]) == False, :]

df["high"] = 0
df.loc[df["atms"] > 5, "high"] = 1

df = df.groupby(["start_time", "tnc_country_group"])["high"].agg("mean").reset_index()

alt.Chart(
    df.loc[df["tnc_country_group"].str.contains("DEU") == False, :]
).mark_line().encode(
    x=alt.X("start_time:N", axis=alt.Axis(title="Month")),
    y=alt.Y(
        "high:Q", axis=alt.Axis(title="% of FAUs above 5 ATMs per month", format="%")
    ),
).properties(
    width=300, height=200, title="% of FAUs with more than 5 ATMs per month in DEU"
).facet(
    facet="tnc_country_group", columns=2
)

In [56]:
df = pd.read_csv("atm_old.csv")

df["tnc_country_group"] = df["tnc_country_group"].str.strip()
df = df.loc[df["tnc_country_group"].isin(["NEuro", "Other", "GrE"]) == False, :]

df["atms"] = df["atms"].fillna(0)

df = df.groupby(["start_time", "tnc_country_group"])["atms"].agg("mean").reset_index()

alt.Chart(
    df.loc[df["tnc_country_group"].str.contains("DEU") == False, :]
).mark_line().encode(
    x=alt.X("start_time:N", axis=alt.Axis(title="Month")),
    y=alt.Y("atms:Q", axis=alt.Axis(title="Avg. # of ATMs per FAU")),
).properties(
    width=300, height=200, title="Avg. # of ATMs per FAU in DEU"
).facet(
    facet="tnc_country_group", columns=2
)

## Current data 

In [None]:
query = """


with sample as ( 

select user_id, user_created, kyc_first_completed, country_tnc_legal, tnc_country_group
from dbt.zrh_users 

),
counts as ( 

select sample.*, zup.product_id,
		start_time, 
		count(distinct zt.id) as atms 
from dwh_cohort_months as dwh 
inner join dwh_user_activity as act 
	on act.activity_type = '5'
	and end_time between activity_start and activity_end 
left join dwh_adorable_transaction as zt 
	on zt.user_created = act.user_created 
	and date_trunc('month',zt.created) = start_time
	and mcc_group = 1 
	and type = 'PT'
inner join sample on sample.user_created  = act.user_created 
inner join dbt.zrh_user_product as zup on zup.user_created = act.user_created and end_time between subscription_valid_from and subscription_valid_until 
where start_time between '2016-01-01' and '2017-12-01'
group by 1,2,3,4,5,6,7
)

select * from counts 	


"""

In [150]:
df = pd.read_csv("atm_new.csv")

df["tnc_country_group"] = df["tnc_country_group"].str.strip()
df = df.loc[df["tnc_country_group"].isin(["NEuro", "Other", "GrE"]) == False, :]

df = df.loc[pd.to_datetime(df["start_time"]) == pd.to_datetime("2020-07-01"), :]

df["domestic"] = df["dom_atm"]
df["international"] = df["atms"] - df["domestic"]

df = (
    df.groupby(["user_id", "tnc_country_group"])["domestic", "international"]
    .agg("max")
    .reset_index()
    .fillna(0)
)

df = pd.melt(df, id_vars=["user_id", "tnc_country_group"])

df = (
    df.groupby(["tnc_country_group", "variable", "value"])["user_id"]
    .agg("nunique")
    .reset_index()
)


df["perc"] = (
    100
    * df["user_id"]
    / df.groupby(["tnc_country_group", "variable"])["user_id"].transform("sum")
)
df["cum"] = df.groupby(["tnc_country_group", "variable"])["perc"].cumsum()


alt.Chart(df.loc[df["value"] < 10, :]).mark_line().encode(
    x=alt.X("value:Q", axis=alt.Axis(title="Number of ATMs per month")),
    y=alt.Y("cum:Q", axis=alt.Axis(title="Percentile")),
    color="variable:N",
).properties(width=200, height=200, title="ATM withdrawals across markets").facet(
    facet="tnc_country_group", columns=3
)

  # This is added back by InteractiveShellApp.init_path()


In [154]:
df.loc[df["value"] == 8, :].head(10)

Unnamed: 0,tnc_country_group,variable,value,user_id,perc,cum
8,AUT,domestic,8,22,0.987876,96.946565
32,AUT,international,8,4,0.179614,99.416255
47,DEU,domestic,8,168,1.181601,96.996765
77,DEU,international,8,20,0.140667,99.7046
103,ESP,domestic,8,16,0.694746,97.524967
127,ESP,international,8,3,0.130265,98.957881
150,FRA,domestic,8,492,1.343271,95.953805
189,FRA,international,8,40,0.109209,99.432113
228,ITA,domestic,8,55,1.236233,93.459204
264,ITA,international,8,6,0.134862,99.415599


In [137]:
df = pd.read_csv("atm_new.csv")

df["tnc_country_group"] = df["tnc_country_group"].str.strip()
df = df.loc[df["tnc_country_group"].isin(["NEuro", "Other", "GrE"]) == False, :]

df = df.loc[pd.to_datetime(df["start_time"]) == pd.to_datetime("2020-07-01"), :]

df["high"] = 0
df.loc[df["dom_atm"] > 8, "high"] = 1

df = df.groupby(["tnc_country_group", "user_id"])["high"].agg("max").reset_index()
df = df.groupby("tnc_country_group")["high"].agg("mean").reset_index()

alt.Chart(df).mark_bar().encode(
    x=alt.X("tnc_country_group:N", axis=alt.Axis(title="Market")),
    y=alt.Y(
        "high:Q", axis=alt.Axis(title="% of MAUs above 8 ATMs per month", format="%")
    ),
).properties(
    width=400,
    height=400,
    title="% of Metal MAUs with more than 8 domestic ATMs in July",
)

In [138]:
df = pd.read_csv("atm_new.csv")

df["tnc_country_group"] = df["tnc_country_group"].str.strip()
df = df.loc[df["tnc_country_group"].isin(["NEuro", "Other", "GrE"]) == False, :]

df = df.loc[pd.to_datetime(df["start_time"]) == pd.to_datetime("2020-07-01"), :]

df["high"] = 0
df.loc[df["dom_atm"] > 8, "high"] = 1

df = df.groupby(["tnc_country_group", "user_id"])["high"].agg("max").reset_index()
df = df.groupby("tnc_country_group")["high"].agg("sum").reset_index()

alt.Chart(df).mark_bar().encode(
    x=alt.X("tnc_country_group:N", axis=alt.Axis(title="Market")),
    y=alt.Y("high:Q", axis=alt.Axis(title="# of MAUs above 8 domestic ATMs")),
).properties(
    width=400,
    height=400,
    title="# of Metal MAUs with more than 8 domestic ATMs in July",
)

In [125]:
df = pd.read_csv("atm_new.csv")

df["tnc_country_group"] = df["tnc_country_group"].str.strip()
df = df.loc[df["tnc_country_group"].isin(["NEuro", "Other", "GrE"]) == False, :]

df = df.loc[pd.to_datetime(df["start_time"]) == pd.to_datetime("2020-07-01"), :]

df = (
    df.groupby(["tnc_country_group"])["dom_atm"]
    .quantile([0.5, 0.70, 0.90, 0.95, 0.99], "lower")
    .reset_index()
)

df["Percentile"] = (df["level_1"] * 100).astype(int)

# Configure common options
base = (
    alt.Chart(df)
    .encode(
        alt.X("Percentile:N", scale=alt.Scale(paddingInner=0)),
        alt.Y("tnc_country_group:O", scale=alt.Scale(paddingInner=0)),
    )
    .properties(width=500, height=500)
)

# Configure heatmap
heatmap = base.mark_rect().encode(
    color=alt.Color(
        "dom_atm:Q",
        scale=alt.Scale(scheme="viridis"),
        legend=alt.Legend(direction="horizontal"),
    )
)

# Configure text
text = base.mark_text(baseline="middle").encode(text="dom_atm:Q")

# Draw the chart
heatmap + text

In [133]:
df = pd.read_csv("atm_new.csv")

df["tnc_country_group"] = df["tnc_country_group"].str.strip()
df = df.loc[df["tnc_country_group"].isin(["NEuro", "Other", "GrE"]) == False, :]
df = df.loc[pd.to_datetime(df["start_time"]) == pd.to_datetime("2020-07-01"), :]

df = (
    df.groupby(["dom_atm", "tnc_country_group"])["user_id"].agg("nunique").reset_index()
)

df["atms"] = df["dom_atm"] * df["user_id"]

df["perc"] = (
    100 * df["atms"] / df.groupby(["tnc_country_group"])["atms"].transform("sum")
)
df["cum"] = df.groupby(["tnc_country_group"])["perc"].cumsum()

alt.Chart(df).mark_line().encode(
    x=alt.X("dom_atm:Q", axis=alt.Axis(title="Number of domestic ATMs per month")),
    y=alt.Y("cum:Q", axis=alt.Axis(title="% of all domestic ATMs")),
    color="tnc_country_group:N",
).properties(
    width=400,
    height=400,
    title="How much do users that withdraw x times contribute to total ATM withdrawals?",
)

In [135]:
df.loc[df["dom_atm"] == 8, :].head(20)

Unnamed: 0,dom_atm,tnc_country_group,user_id,atms,perc,cum
40,8,AUT,22,176,5.736636,71.284224
41,8,DEU,168,1344,5.312673,79.365958
42,8,ESP,16,128,4.776119,71.044776
43,8,FRA,492,3936,5.826795,72.031088
44,8,ITA,55,440,4.936056,54.128337


# Data for Fernando

In [15]:
df = pd.read_csv("atm_new.csv").fillna(0)
df["domestic"] = df["dom_atm"]
df["international"] = df["atms"] - df["domestic"]

df = pd.melt(
    df,
    id_vars=["user_id", "country_tnc_legal", "start_time"],
    value_vars=["domestic", "international"],
).reset_index()

df = (
    df.groupby(["country_tnc_legal", "start_time", "variable", "value"])["user_id"]
    .agg("nunique")
    .reset_index()
)

df.to_csv("fernando.csv")

In [16]:
df.head(20)

Unnamed: 0,country_tnc_legal,start_time,variable,value,user_id
0,AUT,2020-06-01 00:00:00,domestic,0,1298
1,AUT,2020-06-01 00:00:00,domestic,1,285
2,AUT,2020-06-01 00:00:00,domestic,2,165
3,AUT,2020-06-01 00:00:00,domestic,3,104
4,AUT,2020-06-01 00:00:00,domestic,4,73
5,AUT,2020-06-01 00:00:00,domestic,5,47
6,AUT,2020-06-01 00:00:00,domestic,6,28
7,AUT,2020-06-01 00:00:00,domestic,7,24
8,AUT,2020-06-01 00:00:00,domestic,8,14
9,AUT,2020-06-01 00:00:00,domestic,9,11
