In [203]:
#%load_ext lab_black

In [204]:
from google.cloud import bigquery
from google.oauth2 import service_account
import pandas_gbq
import pandas as pd
import numpy as np
import os
import warnings

warnings.filterwarnings('ignore')

credentials = service_account.Credentials.from_service_account_file(
    "/Users/miguelcouto/Downloads/zattoo-dataeng-e5f45785174f.json"
)

project_id = "zattoo-dataeng"
client = bigquery.Client(credentials=credentials, project=project_id)

In [205]:
sql_calcs = """
   select mt.transaction_id,
       mt.zuid,
       mt.payment_method,
       mt.sku,
       mt.type_of_transaction,
       case
           when mt.country_name = 'Germany' then 'Germany'
           when mt.country_name = 'Austria' then 'Austria'
           else 'Switzerland' end as country_name,
       mt.new_booking_net_CHF,
       mt.renewal_booking_net_CHF,
       mt.new_booking_net_EUR,
       mt.renewal_booking_net_EUR,
       mt.transaction_date,
       mt.term_start,
       mt.term_end,
       product_service_view.zuya_account_permission as product_group_finance
from b2c_middleware.middlelayer_transactions mt
         LEFT JOIN b2c_middleware_import.product_product_view AS product_product_view
                   ON mt.sku = product_product_view.sku
                       AND DATE(mt.transaction_date) = product_product_view.inserted_at_date
         LEFT JOIN b2c_middleware_import.product_subscriptionproduct_view AS product_subscriptionproduct_view
                   ON product_product_view.id = product_subscriptionproduct_view.id
                       AND
                      product_product_view.inserted_at_date = product_subscriptionproduct_view.inserted_at_date
         LEFT JOIN b2c_middleware_import.product_offer_view AS product_offer_view
                   ON product_subscriptionproduct_view.offer_id = product_offer_view.id
                       AND product_subscriptionproduct_view.inserted_at_date = product_offer_view.inserted_at_date
         LEFT JOIN b2c_middleware_import.product_service_view AS product_service_view
                   ON product_offer_view.service_id = product_service_view.id
                       AND product_offer_view.inserted_at_date = product_service_view.inserted_at_date
where true
  and mt.app_shop_id = 'datatrans'
  and mt.transaction_date >= '2022-02-01 00:00:00'
  and mt.transaction_date <= '2022-02-28 23:59:59'
-- NEW LINE! added on 2022-04-27
  and mt.type_of_transaction != 'refund'
           """

In [206]:
## prepare dataframe
df = pandas_gbq.read_gbq(sql_calcs, project_id=project_id, progress_bar_type=None)
type_trans_lst = ["new_sale", "renewal", "refund"]

parse_dates = ["term_start", "term_end"]
mt_df = df[df["type_of_transaction"].isin(type_trans_lst)].copy()

for date in parse_dates:
    mt_df[date] = pd.to_datetime(mt_df[date]).dt.tz_convert(None)

In [207]:
## calculate total_booking_net columns
mt_df["total_booking_net_CHF"] = (
        mt_df["new_booking_net_CHF"] + mt_df["renewal_booking_net_CHF"]
)
mt_df["total_booking_net_EUR"] = (
        mt_df["new_booking_net_EUR"] + mt_df["renewal_booking_net_EUR"]
)

In [208]:
## drop unnecessary columns
mt_df.drop(
    [
        "new_booking_net_EUR",
        "new_booking_net_CHF",
        "renewal_booking_net_CHF",
        "renewal_booking_net_EUR",
    ],
    axis=1,
    inplace=True,
)

In [209]:
## calculate product_term_length_months
mt_df["product_term_length_months"] = (
        (mt_df["term_end"].dt.year - mt_df["term_start"].dt.year) * 12
        + (mt_df["term_end"].dt.month - mt_df["term_start"].dt.month)
        + 1
)

mt_df["product_term_length_months"][mt_df["product_term_length_months"] < 0] = 0

In [210]:
## reseting index
mt_df = mt_df.reindex(mt_df.index.repeat(mt_df["product_term_length_months"]))

In [211]:
## add revenue_month_number
mt_df["revenue_month_number"] = mt_df.groupby(["transaction_id"]).cumcount() + 1

In [215]:
## add revenue_month_date
mt_df["revenue_month_date"] = mt_df["term_start"].to_numpy().astype("datetime64[M]")
mt_df["revenue_month_date"][mt_df["type_of_transaction"] == "refund"] = (
    mt_df["transaction_date"].to_numpy().astype("datetime64[M]")
)

mt_df["revenue_month_date"] = mt_df.apply(
    lambda x: x["revenue_month_date"]
              + pd.offsets.MonthEnd(x["revenue_month_number"])
              + pd.offsets.MonthBegin(-1),
    axis=1,
)

## NEW LINE! commenting out as we now have product_service_view.zuya_account_permission available
# mt_df["product_group_finance"] = np.nan

mt_df["product_term_length"] = mt_df["revenue_month_date"].apply(
    lambda t: pd.Period(t, freq="S").days_in_month
)

mt_df["product_term_length"][mt_df["term_start"] > mt_df["revenue_month_date"]] = (
                                                                                          mt_df[
                                                                                              "term_start"].dt.daysinmonth -
                                                                                          mt_df["term_start"].dt.day
                                                                                  ) + 1

In [217]:
## reseting index
mt_df.reset_index(drop=True, inplace=True)

In [218]:
## set active_sub_month_end = 1 by default
mt_df["active_sub_month_end"] = 1

## get last indices of each transaction_id group
last_idxs = (
        len(mt_df)
        - np.unique(
    mt_df.transaction_id.values[::-1],
    return_index=1,
)[1]
        - 1
)

mt_df["active_sub_month_end"].iloc[last_idxs] = 0

## mark all one transaction subscriptions as 1
mt_df["active_sub_month_end"][
    (mt_df["revenue_month_number"] <= 1) & (mt_df["product_term_length_months"] <= 1)
    ] = 1



## mark all refund transactions as -1
mt_df["active_sub_month_end"][mt_df["type_of_transaction"] == "refund"] = -1

## mark all subscriptions with only 1 transaction as i
mt_df["active_sub_month_end"][mt_df["type_of_transaction"] == "refund"] = -1

In [223]:
## fix last position of product_term_length per transaction_id
mt_df["product_term_length"].iloc[last_idxs] = (
                                                       mt_df["term_end"].iloc[last_idxs] -
                                                       mt_df["revenue_month_date"].iloc[last_idxs]
                                               ).dt.days #+ 1

In [224]:
mt_df[mt_df['transaction_id'] == '35777994']

Unnamed: 0,transaction_id,zuid,payment_method,sku,type_of_transaction,country_name,transaction_date,term_start,term_end,product_group_finance,total_booking_net_CHF,total_booking_net_EUR,product_term_length_months,revenue_month_number,revenue_month_date,product_term_length,active_sub_month_end
233612,35777994,6830309,paypal,zattoo_wdp_hiq_german_3mo,renewal,Germany,2022-02-02 04:09:35,2022-02-03 18:31:53,2022-05-06 18:31:53,base_hiq,24.566282,23.521008,4,1,2022-02-01,26,1
233613,35777994,6830309,paypal,zattoo_wdp_hiq_german_3mo,renewal,Germany,2022-02-02 04:09:35,2022-02-03 18:31:53,2022-05-06 18:31:53,base_hiq,24.566282,23.521008,4,2,2022-03-01,31,1
233614,35777994,6830309,paypal,zattoo_wdp_hiq_german_3mo,renewal,Germany,2022-02-02 04:09:35,2022-02-03 18:31:53,2022-05-06 18:31:53,base_hiq,24.566282,23.521008,4,3,2022-04-01,30,1
233615,35777994,6830309,paypal,zattoo_wdp_hiq_german_3mo,renewal,Germany,2022-02-02 04:09:35,2022-02-03 18:31:53,2022-05-06 18:31:53,base_hiq,24.566282,23.521008,4,4,2022-05-01,6,0


In [179]:
## TEMP FIX to tackle dynamic term_end for refunds
mt_df["product_term_length"][mt_df["product_term_length"] < 0] = 0

In [180]:
## total_days of product_term_length per transaction_id
mt_df["total_days"] = mt_df.groupby("transaction_id")["product_term_length"].transform(
    "sum"
)

In [183]:
## NEW LINE! added on 2022-04-27
## product_term_length fix for susbscriptions < 31 days (1, 3, 7 days)
mt_df['product_term_length'][mt_df.sku.str.contains('day')] = (mt_df.term_end - mt_df.term_start).dt.days

In [111]:
## calculate total_revenue_net fields
mt_df["total_revenue_net_EUR"] = (
        mt_df["total_booking_net_EUR"] / mt_df["total_days"] * mt_df["product_term_length"]
)

mt_df["total_revenue_net_CHF"] = (
        mt_df["total_booking_net_CHF"] / mt_df["total_days"] * mt_df["product_term_length"]
)

## remove total_booking values from all lines of group except first
mt_df.loc[
    mt_df["revenue_month_number"] > 1,
    ["total_booking_net_CHF", "total_booking_net_EUR"],
] = 0.0

## remove VAT values from all lines of group except first
vat_cols = ["vat_CHF", "vat_EUR"]

for col in vat_cols:
    mt_df.loc[
        mt_df["revenue_month_number"] > 1,
        [col, "total_booking_net_EUR"],
    ] = 0.0

In [112]:
## drop total_days column
mt_df.drop(
    ["total_days"],
    axis=1,
    inplace=True,
)

## reorder dataframe
mt_df = mt_df[
    [
        "transaction_id",
        "zuid",
        "payment_method",
        "sku",
        "type_of_transaction",
        "country_name",
        "transaction_date",
        "total_booking_net_CHF",
        "total_booking_net_EUR",
        "vat_CHF",
        "vat_EUR",
        "term_start",
        "term_end",
        "product_term_length",
        #        "total_days",
        "product_term_length_months",
        "product_group_finance",
        "revenue_month_number",
        "revenue_month_date",
        "total_revenue_net_EUR",
        "total_revenue_net_CHF",
        "active_sub_month_end",
    ]
]

In [113]:
## prepare df_nocalcs for free trials and full discounts
mt_df_nocalcs = df[~df["type_of_transaction"].isin(type_trans_lst)].copy()

for date in parse_dates:
    mt_df_nocalcs[date] = pd.to_datetime(mt_df_nocalcs[date]).dt.tz_convert(None)

In [114]:
## calculate total_booking_net columns
mt_df_nocalcs["total_booking_net_CHF"] = (
        mt_df_nocalcs["new_booking_net_CHF"] + mt_df_nocalcs["renewal_booking_net_CHF"]
)
mt_df_nocalcs["total_booking_net_EUR"] = (
        mt_df_nocalcs["new_booking_net_EUR"] + mt_df_nocalcs["renewal_booking_net_EUR"]
)

In [115]:
## drop unnecessary columns
mt_df_nocalcs.drop(
    [
        "new_booking_net_EUR",
        "new_booking_net_CHF",
        "renewal_booking_net_CHF",
        "renewal_booking_net_EUR",
    ],
    axis=1,
    inplace=True,
)

In [116]:
## append dataframes
mt_df_final = mt_df.append(mt_df_nocalcs).reset_index(drop=True)

## convert revenue_month_date to date
mt_df_final["revenue_month_date"] = pd.to_datetime(
    mt_df_final["revenue_month_date"]
).dt.date

In [117]:
# mt_df_final['term_end_date'] = pd.to_datetime(mt_df_final["term_end"].dt.strftime('%Y-%m-%d'))

In [118]:
# mt_df_final

In [119]:
# mt_df_final[mt_df_final.transaction_id == '27355744']

In [120]:
# mt_df_final[['transaction_id','transaction_date','term_start', 'term_end', 'revenue_month_date','active_sub_month_end']][mt_df_final.transaction_id == '27716044']

In [121]:
# mt_df_final[mt_df_final['term_end_date'] == '2021-06-30']

In [122]:
## define table schema
bq_schema = [
    {"name": "transaction_id", "type": "STRING"},
    {"name": "zuid", "type": "INTEGER"},
    {"name": "payment_method", "type": "STRING"},
    {"name": "sku", "type": "STRING"},
    {"name": "type_of_transaction", "type": "STRING"},
    {"name": "transaction_date", "type": "TIMESTAMP"},
    {"name": "country_name", "type": "STRING"},
    {"name": "total_booking_net_CHF", "type": "FLOAT"},
    {"name": "total_booking_net_EUR", "type": "FLOAT"},
    {"name": "vat_CHF", "type": "FLOAT"},
    {"name": "vat_EUR", "type": "FLOAT"},
    {"name": "term_start", "type": "TIMESTAMP"},
    {"name": "term_end", "type": "TIMESTAMP"},
    {"name": "product_term_length", "type": "INTEGER"},
    {"name": "product_term_length_months", "type": "INTEGER"},
    {"name": "product_group_finance", "type": "STRING"},
    {"name": "revenue_month_number", "type": "INTEGER"},
    {"name": "revenue_month_date", "type": "DATE"},
    {"name": "total_revenue_net_EUR", "type": "FLOAT"},
    {"name": "total_revenue_net_CHF", "type": "FLOAT"},
    {"name": "active_sub_month_end", "type": "INTEGER"},
]

In [123]:
## export to BQ table
pandas_gbq.to_gbq(
    dataframe=mt_df_final,
    destination_table="temp.pypayment_v2_1_022022",
    project_id="zattoo-dataeng",
    if_exists="replace",
    progress_bar=None,
    table_schema=bq_schema,
)

In [124]:
## export to csv
path = r"/Users/miguelcouto/Desktop/"

mt_df_final.to_csv(os.path.join(path, r"pypayment_v2_1_022022.csv"))