In [919]:
#%load_ext lab_black

In [920]:
from google.cloud import bigquery
from google.oauth2 import service_account
import pandas_gbq
import pandas as pd
import numpy as np
import os
import warnings

pd.set_option('display.max_columns', None)
warnings.filterwarnings('ignore')

credentials = service_account.Credentials.from_service_account_file(
    "/Users/miguelcouto/Downloads/zattoo-dataeng-e5f45785174f.json"
)

project_id = "zattoo-dataeng"
client = bigquery.Client(credentials=credentials, project=project_id)

In [921]:
sql_calcs = """
select mt.transaction_id,
       mt.zuid,
       mt.payment_method,
       mt.sku,
       mt.type_of_transaction,
       case
           when mt.country_name = 'Germany' then 'Germany'
           when mt.country_name = 'Austria' then 'Austria'
           else 'Switzerland' end as country_name,
       mt.new_booking_net_CHF,
       mt.renewal_booking_net_CHF,
       mt.new_booking_net_EUR,
       mt.renewal_booking_net_EUR,
       mt.transaction_date,
       mt.term_start,
       mt.term_end,
       product_service_view.zuya_account_permission as product_group_finance,
       mt.vat_eur,
       mt.vat_chf
from b2c_middleware.middlelayer_transactions mt
         LEFT JOIN b2c_middleware_import.product_product_view AS product_product_view
                   ON mt.sku = product_product_view.sku
                       AND DATE(mt.transaction_date) = product_product_view.inserted_at_date
         LEFT JOIN b2c_middleware_import.product_subscriptionproduct_view AS product_subscriptionproduct_view
                   ON product_product_view.id = product_subscriptionproduct_view.id
                       AND
                      product_product_view.inserted_at_date = product_subscriptionproduct_view.inserted_at_date
         LEFT JOIN b2c_middleware_import.product_offer_view AS product_offer_view
                   ON product_subscriptionproduct_view.offer_id = product_offer_view.id
                       AND product_subscriptionproduct_view.inserted_at_date = product_offer_view.inserted_at_date
         LEFT JOIN b2c_middleware_import.product_service_view AS product_service_view
                   ON product_offer_view.service_id = product_service_view.id
                       AND product_offer_view.inserted_at_date = product_service_view.inserted_at_date
where true
  and mt.app_shop_id = 'datatrans'
  and mt.transaction_date >= '2022-02-01 00:00:00'
  and mt.transaction_date <= '2022-02-28 23:59:59'
-- NEW LINE! added on 2022-04-27
  and mt.type_of_transaction != 'refund'
           """

In [922]:
## prepare dataframe
df = pandas_gbq.read_gbq(sql_calcs, project_id=project_id, progress_bar_type=None)
type_trans_lst = ["new_sale", "renewal", "refund"]

In [923]:
parse_dates = ["term_start", "term_end"]
mt_df = df[df["type_of_transaction"].isin(type_trans_lst)].copy()

for date in parse_dates:
    mt_df[date] = pd.to_datetime(mt_df[date]).dt.tz_convert(None)

In [924]:
## calculate total_booking_net columns
mt_df["total_booking_net_CHF"] = (
        mt_df["new_booking_net_CHF"] + mt_df["renewal_booking_net_CHF"]
)
mt_df["total_booking_net_EUR"] = (
        mt_df["new_booking_net_EUR"] + mt_df["renewal_booking_net_EUR"]
)

In [925]:
## drop unnecessary columns
mt_df.drop(
    [
        "new_booking_net_EUR",
        "new_booking_net_CHF",
        "renewal_booking_net_CHF",
        "renewal_booking_net_EUR",
    ],
    axis=1,
    inplace=True,
)

In [926]:
## calculate product_term_length_months
mt_df["product_term_length_months"] = (
        (mt_df["term_end"].dt.year - mt_df["term_start"].dt.year) * 12
        + (mt_df["term_end"].dt.month - mt_df["term_start"].dt.month)
        + 1
)

In [927]:
mt_df["product_term_length_months"][mt_df["product_term_length_months"] < 0] = 0

In [928]:
## replacing product_term_length_months for exceptions where value is 2 instead of 1
shorter_subs = (mt_df['term_end'] - mt_df['term_start']).dt.days <= 30
shorter_subs_replacer = mt_df[shorter_subs][
    (mt_df["product_term_length_months"] == 2) & ((mt_df['term_end']).dt.day == 1)].index.to_list()
mt_df["product_term_length_months"].loc[shorter_subs_replacer] = 1

In [929]:
## reseting index
mt_df = mt_df.reindex(mt_df.index.repeat(mt_df["product_term_length_months"]))

In [930]:
## add revenue_month_number
mt_df["revenue_month_number"] = mt_df.groupby(["transaction_id"]).cumcount() + 1

In [931]:
## add revenue_month_date
mt_df["revenue_month_date"] = mt_df["term_start"].to_numpy().astype("datetime64[M]")
mt_df["revenue_month_date"][mt_df["type_of_transaction"] == "refund"] = (
    mt_df["transaction_date"].to_numpy().astype("datetime64[M]")
)

mt_df["revenue_month_date"] = mt_df.apply(
    lambda x: x["revenue_month_date"]
              + pd.offsets.MonthEnd(x["revenue_month_number"])
              + pd.offsets.MonthBegin(-1),
    axis=1,
)

## NEW LINE! commenting out as we now have product_service_view.zuya_account_permission available
# mt_df["product_group_finance"] = np.nan

mt_df["product_term_length"] = mt_df["revenue_month_date"].apply(
    lambda t: pd.Period(t, freq="S").days_in_month
)

mt_df["product_term_length"][mt_df["term_start"] > mt_df["revenue_month_date"]] = (
                                                                                          mt_df[
                                                                                              "term_start"].dt.daysinmonth -
                                                                                          mt_df["term_start"].dt.day
                                                                                  ) + 1

In [932]:
## reseting index
mt_df.reset_index(drop=True, inplace=True)

In [933]:
## set active_sub_month_end = 1 by default
mt_df["active_sub_month_end"] = 1

## get last indices of each transaction_id group
last_idxs = (
        len(mt_df)
        - np.unique(
    mt_df.transaction_id.values[::-1],
    return_index=1,
)[1]
        - 1
)

mt_df["active_sub_month_end"].iloc[last_idxs] = 0

## mark all one transaction subscriptions as 1
mt_df["active_sub_month_end"][
    (mt_df["revenue_month_number"] <= 1) & (mt_df["product_term_length_months"] <= 1)
    ] = 1

## mark all refund transactions as -1
mt_df["active_sub_month_end"][mt_df["type_of_transaction"] == "refund"] = -1

## mark all subscriptions with only 1 transaction as i
mt_df["active_sub_month_end"][mt_df["type_of_transaction"] == "refund"] = -1

In [934]:
## active_sub_content follows the same logic as active_sub_month_end except it doesn't count the last month
mt_df["active_sub_content"] = mt_df.active_sub_month_end

mt_df.loc[mt_df.groupby('transaction_id')['active_sub_content'].tail(2).index, 'active_sub_content'] = 0

In [935]:
## fix last position of product_term_length per transaction_id
mt_df["product_term_length"].iloc[last_idxs] = (
        mt_df["term_end"].iloc[last_idxs] -
        mt_df["revenue_month_date"].iloc[last_idxs]
).dt.days

In [936]:
## TEMP FIX to tackle dynamic term_end for refunds
mt_df["product_term_length"][mt_df["product_term_length"] < 0] = 0

In [937]:
## total_days of product_term_length per transaction_id
mt_df["total_days"] = mt_df.groupby("transaction_id")["product_term_length"].transform(
    "sum"
)

In [963]:
## get 12mo subs that didn't stay for 12 months for posterior treatment of exception
mt_df['max_revenue_month_number'] = mt_df.groupby(['transaction_id'])['revenue_month_number'].transform(max)

mt_df[(mt_df.sku.str.contains('zattoo_web_hiq_swiss_12mo')) & (mt_df.max_revenue_month_number < 13)]

Unnamed: 0,transaction_id,zuid,payment_method,sku,type_of_transaction,country_name,transaction_date,term_start,term_end,product_group_finance,vat_eur,vat_chf,total_booking_net_CHF,total_booking_net_EUR,product_term_length_months,revenue_month_number,revenue_month_date,product_term_length,active_sub_month_end,active_sub_content,total_days,max_revenue_month_number
2581,35774006,24277,credit_card,zattoo_web_hiq_swiss_12mo,new_sale,Switzerland,2022-02-01 18:38:41,2022-03-04 17:06:59,2022-03-04 19:41:38,base_hiq,8.214342,8.579387,111.420613,106.679764,1,1,2022-03-01,3,1,0,3,1
27178,35777879,28714967,credit_card,zattoo_web_hiq_swiss_12mo,renewal,Switzerland,2022-02-02 04:02:12,2022-02-03 20:52:18,2022-05-10 13:37:59,base_hiq,8.214342,8.579387,111.420613,106.679764,4,1,2022-02-01,26,1,1,96,4
27179,35777879,28714967,credit_card,zattoo_web_hiq_swiss_12mo,renewal,Switzerland,2022-02-02 04:02:12,2022-02-03 20:52:18,2022-05-10 13:37:59,base_hiq,8.214342,8.579387,111.420613,106.679764,4,2,2022-03-01,31,1,1,96,4
27180,35777879,28714967,credit_card,zattoo_web_hiq_swiss_12mo,renewal,Switzerland,2022-02-02 04:02:12,2022-02-03 20:52:18,2022-05-10 13:37:59,base_hiq,8.214342,8.579387,111.420613,106.679764,4,3,2022-04-01,30,1,0,96,4
27181,35777879,28714967,credit_card,zattoo_web_hiq_swiss_12mo,renewal,Switzerland,2022-02-02 04:02:12,2022-02-03 20:52:18,2022-05-10 13:37:59,base_hiq,8.214342,8.579387,111.420613,106.679764,4,4,2022-05-01,9,0,0,96,4
27221,35777946,25960645,paypal,zattoo_web_hiq_swiss_12mo,renewal,Switzerland,2022-02-02 04:06:38,2022-02-02 06:02:28,2022-02-02 06:02:28,base_hiq,8.214342,8.579387,111.420613,106.679764,1,1,2022-02-01,1,1,0,1,1
27248,35832660,25475690,paypal,zattoo_web_hiq_swiss_12mo,renewal,Switzerland,2022-02-06 13:26:24,2022-02-07 14:35:59,2022-04-23 09:22:58,base_hiq,8.214342,8.579387,111.420613,106.679764,3,1,2022-02-01,22,1,1,75,3
27249,35832660,25475690,paypal,zattoo_web_hiq_swiss_12mo,renewal,Switzerland,2022-02-06 13:26:24,2022-02-07 14:35:59,2022-04-23 09:22:58,base_hiq,8.214342,8.579387,111.420613,106.679764,3,2,2022-03-01,31,1,0,75,3
27250,35832660,25475690,paypal,zattoo_web_hiq_swiss_12mo,renewal,Switzerland,2022-02-06 13:26:24,2022-02-07 14:35:59,2022-04-23 09:22:58,base_hiq,8.214342,8.579387,111.420613,106.679764,3,3,2022-04-01,22,0,0,75,3
27368,35777940,16261409,paypal,zattoo_web_hiq_swiss_12mo,renewal,Switzerland,2022-02-02 04:06:05,2022-02-02 14:29:32,2022-02-02 14:29:32,base_hiq,8.214342,8.579387,111.420613,106.679764,1,1,2022-02-01,1,1,0,1,1


In [889]:
## NEW LINE! added on 2022-04-27
## product_term_length fix for subscriptions < 31 days (1, 3, 7 days)
mt_df['product_term_length'][mt_df.sku.str.contains('day')] = (mt_df.term_end - mt_df.term_start).dt.days

In [890]:
trx_lst = mt_df['transaction_id'][(mt_df.sku.str.contains('day')) & (mt_df['revenue_month_number'] > 1)].unique()

In [891]:
## total_days fix for subscriptions < 31 days (1, 3, 7 days)
mt_df["total_days"][(mt_df.product_term_length != mt_df.total_days) & (mt_df['product_term_length_months'] <= 1)] = mt_df["product_term_length"][
    (mt_df.product_term_length != mt_df.total_days) & (mt_df['product_term_length_months'] <= 1)]

In [892]:
## calculate total_revenue_net fields
mt_df["total_revenue_net_EUR"] = (
        mt_df["total_booking_net_EUR"] / mt_df["total_days"] * mt_df["product_term_length"]
)

mt_df["total_revenue_net_CHF"] = (
        mt_df["total_booking_net_CHF"] / mt_df["total_days"] * mt_df["product_term_length"]
)

## remove total_booking values from all lines of group except first
mt_df.loc[
    mt_df["revenue_month_number"] > 1,
    ["total_booking_net_CHF", "total_booking_net_EUR"],
] = 0.0

In [893]:
## remove VAT values from all lines of group except first
vat_cols = ["vat_chf", "vat_eur"]

for col in vat_cols:
    mt_df.loc[
        mt_df["revenue_month_number"] > 1,
        [col, "total_booking_net_EUR"],
    ] = 0.0

In [894]:
## prepare df_nocalcs for free trials and full discounts
mt_df_nocalcs = df[~df["type_of_transaction"].isin(type_trans_lst)].copy()

for date in parse_dates:
    mt_df_nocalcs[date] = pd.to_datetime(mt_df_nocalcs[date]).dt.tz_convert(None)

In [895]:
## calculate total_booking_net columns
mt_df_nocalcs["total_booking_net_CHF"] = (
        mt_df_nocalcs["new_booking_net_CHF"] + mt_df_nocalcs["renewal_booking_net_CHF"]
)
mt_df_nocalcs["total_booking_net_EUR"] = (
        mt_df_nocalcs["new_booking_net_EUR"] + mt_df_nocalcs["renewal_booking_net_EUR"]
)

In [896]:
## adding vat_percentage to calculate sales_price
mt_df['vat_eur_percentage'] = (mt_df['vat_eur'] / mt_df['total_booking_net_EUR']).astype(float, errors='ignore')
mt_df['vat_chf_percentage'] = (mt_df['vat_chf'] / mt_df['total_booking_net_CHF']).astype(float, errors='ignore')

mt_df['sales_price_eur'] = mt_df['total_booking_net_EUR'] * (
        1 + mt_df['vat_eur_percentage'])
mt_df['sales_price_chf'] = mt_df['total_booking_net_CHF'] * (
        1 + mt_df['vat_chf_percentage'])

In [897]:
## append dataframes
mt_df_final = mt_df.append(mt_df_nocalcs).reset_index(drop=True)

## convert revenue_month_date to date
mt_df_final["revenue_month_date"] = pd.to_datetime(
    mt_df_final["revenue_month_date"]
).dt.date

In [898]:
## extract list of transaction ids whose subscription float between one month and the other, and are 1, 3 or 7 days (== product_length < 31)
trx_lst_more_1month_subs = mt_df['transaction_id'][
    (mt_df["revenue_month_number"] > 1) & (mt_df.sku.str.contains('day'))]

## this logic makes sure the days are allocated correctly for these very specific subscriptions
mt_df['product_term_length'][
    (mt_df.transaction_id.isin(trx_lst_more_1month_subs)) & (mt_df.revenue_month_number == 1)] = - (
        mt_df.term_start - mt_df.revenue_month_date.shift(1)).dt.days

mt_df['product_term_length'][
    (mt_df.transaction_id.isin(trx_lst_more_1month_subs)) & (mt_df.revenue_month_number > 1)] = (
        mt_df.term_end - mt_df.revenue_month_date).dt.days

In [899]:
## fix active_sub_month_end for subscriptions with only 1 revenue month and length < 30 days
mt_df['last_day_term_end'] = (pd.to_datetime(mt_df['term_end'], format="%d", unit='d')).dt.day
mt_df['last_day_month'] = (pd.to_datetime(mt_df['term_end'], format="%d", unit='d')+ pd.offsets.MonthEnd(n=1)).dt.day

mt_df["active_sub_month_end"][mt_df.last_day_term_end < mt_df.last_day_month] = 0

In [900]:
## fix those one line transactions that still have wrong product_term_length
trx_id_counts = mt_df['transaction_id'].value_counts(sort=False)
check_length_mask = mt_df[mt_df['transaction_id'].isin(trx_id_counts.index[trx_id_counts == 1])].index.to_list()

mt_df["product_term_length"].loc[check_length_mask] = mt_df['term_end'].dt.day - mt_df.term_start.dt.day

In [901]:
## reorder dataframe
mt_df = mt_df[
    [
        "transaction_id",
        "zuid",
        "payment_method",
        "sku",
        "type_of_transaction",
        "country_name",
        "transaction_date",
        "total_booking_net_CHF",
        "total_booking_net_EUR",
        "vat_chf",
        "vat_eur",
        "term_start",
        "term_end",
        "product_term_length",
        "product_term_length_months",
        "product_group_finance",
        "revenue_month_number",
        "revenue_month_date",
        "total_revenue_net_EUR",
        "total_revenue_net_CHF",
        "sales_price_eur",
        "sales_price_chf",
        "active_sub_month_end",
    ]
]

## define BQ table schema
bq_schema = [
    {"name": "transaction_id", "type": "STRING"},
    {"name": "zuid", "type": "INTEGER"},
    {"name": "payment_method", "type": "STRING"},
    {"name": "sku", "type": "STRING"},
    {"name": "type_of_transaction", "type": "STRING"},
    {"name": "transaction_date", "type": "TIMESTAMP"},
    {"name": "country_name", "type": "STRING"},
    {"name": "total_booking_net_CHF", "type": "FLOAT"},
    {"name": "total_booking_net_EUR", "type": "FLOAT"},
    {"name": "vat_chf", "type": "FLOAT"},
    {"name": "vat_eur", "type": "FLOAT"},
    {"name": "term_start", "type": "TIMESTAMP"},
    {"name": "term_end", "type": "TIMESTAMP"},
    {"name": "product_term_length", "type": "INTEGER"},
    {"name": "product_term_length_months", "type": "INTEGER"},
    {"name": "product_group_finance", "type": "STRING"},
    {"name": "revenue_month_number", "type": "INTEGER"},
    {"name": "revenue_month_date", "type": "DATE"},
    {"name": "total_revenue_net_EUR", "type": "FLOAT"},
    {"name": "total_revenue_net_CHF", "type": "FLOAT"},
    {"name": "sales_price_eur", "type": "FLOAT"},
    {"name": "sales_price_chf", "type": "FLOAT"},
    {"name": "active_sub_month_end", "type": "INTEGER"},
]

In [902]:
## export to BQ table
pandas_gbq.to_gbq(
    dataframe=mt_df_final,
    destination_table="temp.pypayment_v2_1_022022",
    project_id="zattoo-dataeng",
    if_exists="replace",
    progress_bar=None,
    table_schema=bq_schema,
)

In [903]:
## export to csv
path = r"/Users/miguelcouto/Desktop/"

mt_df_final.to_csv(os.path.join(path, r"pypayment_v2_1_022022.csv"))