In [123]:
%load_ext lab_black

The lab_black extension is already loaded. To reload it, use:
  %reload_ext lab_black


In [127]:
from google.cloud import bigquery
from google.oauth2 import service_account
import pandas_gbq
import pandas as pd
import numpy as np
import os
import warnings

warnings.filterwarnings('ignore')

credentials = service_account.Credentials.from_service_account_file(
    "/Users/miguelcouto/Downloads/zattoo-dataeng-e5f45785174f.json"
)

project_id = "zattoo-dataeng"
client = bigquery.Client(credentials=credentials, project=project_id)

In [128]:
sql_calcs = """
   select mt.transaction_id,
       mt.zuid,
       mt.payment_method,
       mt.sku,
       mt.type_of_transaction,
       case
           when mt.country_name = 'Germany' then 'Germany'
           when mt.country_name = 'Austria' then 'Austria'
           else 'Switzerland' end as country_name,
       mt.new_booking_net_CHF,
       mt.renewal_booking_net_CHF,
       mt.new_booking_net_EUR,
       mt.renewal_booking_net_EUR,
       mt.transaction_date,
       mt.term_start,
       mt.term_end,
       product_service_view.zuya_account_permission as product_group_finance
from b2c_middleware.middlelayer_transactions mt
         LEFT JOIN b2c_middleware_import.product_product_view AS product_product_view
                   ON mt.sku = product_product_view.sku
                       AND DATE(mt.transaction_date) = product_product_view.inserted_at_date
         LEFT JOIN b2c_middleware_import.product_subscriptionproduct_view AS product_subscriptionproduct_view
                   ON product_product_view.id = product_subscriptionproduct_view.id
                       AND
                      product_product_view.inserted_at_date = product_subscriptionproduct_view.inserted_at_date
         LEFT JOIN b2c_middleware_import.product_offer_view AS product_offer_view
                   ON product_subscriptionproduct_view.offer_id = product_offer_view.id
                       AND product_subscriptionproduct_view.inserted_at_date = product_offer_view.inserted_at_date
         LEFT JOIN b2c_middleware_import.product_service_view AS product_service_view
                   ON product_offer_view.service_id = product_service_view.id
                       AND product_offer_view.inserted_at_date = product_service_view.inserted_at_date
where true
  and mt.app_shop_id = 'datatrans'
  and mt.transaction_date >= '2022-02-01 00:00:00'
  and mt.transaction_date <= '2022-02-28 23:59:59'
           """

In [129]:
## prepare dataframe
df = pandas_gbq.read_gbq(sql_calcs, project_id=project_id, progress_bar_type=None, credentials=credentials)
type_trans_lst = ["new_sale", "renewal", "refund"]

parse_dates = ["term_start", "term_end"]
mt_df = df[df["type_of_transaction"].isin(type_trans_lst)].copy()

for date in parse_dates:
    mt_df[date] = pd.to_datetime(mt_df[date]).dt.tz_convert(None)

In [130]:
## calculate total_booking_net columns
mt_df["total_booking_net_CHF"] = (
        mt_df["new_booking_net_CHF"] + mt_df["renewal_booking_net_CHF"]
)
mt_df["total_booking_net_EUR"] = (
        mt_df["new_booking_net_EUR"] + mt_df["renewal_booking_net_EUR"]
)

In [131]:
## drop unnecessary columns
mt_df.drop(
    [
        "new_booking_net_EUR",
        "new_booking_net_CHF",
        "renewal_booking_net_CHF",
        "renewal_booking_net_EUR",
    ],
    axis=1,
    inplace=True,
)

In [132]:
## calculate product_term_length_months
mt_df["product_term_length_months"] = (
        (mt_df["term_end"].dt.year - mt_df["term_start"].dt.year) * 12
        + (mt_df["term_end"].dt.month - mt_df["term_start"].dt.month)
        + 1
)

mt_df["product_term_length_months"][mt_df["product_term_length_months"] < 0] = 0

In [133]:
## reseting index
mt_df = mt_df.reindex(mt_df.index.repeat(mt_df["product_term_length_months"]))

In [134]:
## add revenue_month_number
mt_df["revenue_month_number"] = mt_df.groupby(["transaction_id"]).cumcount() + 1

In [135]:
mt_df

Unnamed: 0,transaction_id,zuid,payment_method,sku,type_of_transaction,country_name,transaction_date,term_start,term_end,product_group_finance,total_booking_net_CHF,total_booking_net_EUR,product_term_length_months,revenue_month_number
0,35804150,9968015,paypal,zattoo_web_polish_german_1_mo,new_sale,Germany,2022-02-04 06:17:26,2022-02-05 16:21:57,2022-03-08 16:21:57,polish,6.933677,6.638655,2,1
0,35804150,9968015,paypal,zattoo_web_polish_german_1_mo,new_sale,Germany,2022-02-04 06:17:26,2022-02-05 16:21:57,2022-03-08 16:21:57,polish,6.933677,6.638655,2,2
1,35804293,30253928,paypal,zattoo_web_polish_german_1_mo,new_sale,Germany,2022-02-04 06:26:54,2022-02-05 20:16:55,2022-03-08 20:16:55,polish,6.933677,6.638655,2,1
1,35804293,30253928,paypal,zattoo_web_polish_german_1_mo,new_sale,Germany,2022-02-04 06:26:54,2022-02-05 20:16:55,2022-03-08 20:16:55,polish,6.933677,6.638655,2,2
2,35804319,30254161,paypal,zattoo_web_polish_german_1_mo,new_sale,Germany,2022-02-04 06:28:47,2022-02-05 21:27:04,2022-03-08 21:27:04,polish,6.933677,6.638655,2,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
122154,35817318,30254117,klarna,zattoo_web_cirkus_german_1mo,renewal,Germany,2022-02-05 08:41:24,2022-02-06 14:05:33,2022-03-09 14:05:33,cirkus,3.501946,3.352941,2,2
122155,35893326,29242160,paypal,zattoo_web_russian_bundle_german_1mo,renewal,Germany,2022-02-11 09:08:58,2022-02-12 11:34:12,2022-03-15 11:34:12,russian,9.566719,9.159664,2,1
122155,35893326,29242160,paypal,zattoo_web_russian_bundle_german_1mo,renewal,Germany,2022-02-11 09:08:58,2022-02-12 11:34:12,2022-03-15 11:34:12,russian,9.566719,9.159664,2,2
122156,35817492,30255408,credit_card,zattoo_web_cirkus_austrian_1mo,renewal,Austria,2022-02-05 08:51:31,2022-02-06 19:33:48,2022-03-09 19:33:48,cirkus,3.472763,3.325000,2,1


In [136]:
## add revenue_month_date
mt_df["revenue_month_date"] = mt_df["term_start"].to_numpy().astype("datetime64[M]")
mt_df["revenue_month_date"][mt_df["type_of_transaction"] == "refund"] = (
    mt_df["transaction_date"].to_numpy().astype("datetime64[M]")
)

mt_df["revenue_month_date"] = mt_df.apply(
    lambda x: x["revenue_month_date"]
              + pd.offsets.MonthEnd(x["revenue_month_number"])
              + pd.offsets.MonthBegin(-1),
    axis=1,
)

## commenting out as we now have product_service_view.zuya_account_permission available
mt_df["product_group_finance"] = np.nan

mt_df["product_term_length"] = mt_df["revenue_month_date"].apply(
    lambda t: pd.Period(t, freq="S").days_in_month
)

In [137]:
## reseting index
mt_df.reset_index(drop=True, inplace=True)

In [138]:
## set active_sub_content = 1 by default
mt_df["active_sub_content"] = 1

## get last indices of each transaction_id group
last_idxs = (
        len(mt_df)
        - np.unique(
    mt_df.transaction_id.values[::-1],
    return_index=1,
)[1]
        - 1
)

mt_df["active_sub_content"].iloc[last_idxs] = 0

## mark all one transaction subscriptions as 1
mt_df["active_sub_content"][
    (mt_df["revenue_month_number"] <= 1) & (mt_df["product_term_length_months"] <= 1)
    ] = 1

## mark all refund transactions as -1
mt_df["active_sub_content"][mt_df["type_of_transaction"] == "refund"] = -1

## mark all subscriptions with only 1 transaction as i
mt_df["active_sub_content"][mt_df["type_of_transaction"] == "refund"] = -1

In [139]:
## fix last position of product_term_length per transaction_id
mt_df["product_term_length"].iloc[last_idxs] = (
                                                       mt_df["term_end"].iloc[last_idxs] -
                                                       mt_df["revenue_month_date"].iloc[last_idxs]
                                               ).dt.days + 1

In [140]:
## TEMP FIX to tackle dynamic term_end for refunds
mt_df["product_term_length"][mt_df["product_term_length"] < 0] = 0

In [141]:
## total_days of product_term_length per transaction_id
mt_df["total_days"] = mt_df.groupby("transaction_id")["product_term_length"].transform(
    "sum"
)

In [142]:
## calculate total_revenue_net fields
mt_df["total_revenue_net_EUR"] = (
        mt_df["total_booking_net_EUR"] / mt_df["total_days"] * mt_df["product_term_length"]
)

mt_df["total_revenue_net_CHF"] = (
        mt_df["total_booking_net_CHF"] / mt_df["total_days"] * mt_df["product_term_length"]
)

## remove total_booking values from all lines of group except first
mt_df.loc[
    mt_df["revenue_month_number"] > 1,
    ["total_booking_net_CHF", "total_booking_net_EUR"],
] = 0.0

In [143]:
## drop total_days column
mt_df.drop(
    ["total_days"],
    axis=1,
    inplace=True,
)

## reorder dataframe
mt_df = mt_df[
    [
        "transaction_id",
        "zuid",
        "payment_method",
        "sku",
        "type_of_transaction",
        "country_name",
        "transaction_date",
        "total_booking_net_CHF",
        "total_booking_net_EUR",
        "term_start",
        "term_end",
        "product_term_length",
        #        "total_days",
        "product_term_length_months",
        "product_group_finance",
        "revenue_month_number",
        "revenue_month_date",
        "total_revenue_net_EUR",
        "total_revenue_net_CHF",
        "active_sub_content",
    ]
]

In [144]:
## prepare df_nocalcs for free trials and full discounts
mt_df_nocalcs = df[~df["type_of_transaction"].isin(type_trans_lst)].copy()

for date in parse_dates:
    mt_df_nocalcs[date] = pd.to_datetime(mt_df_nocalcs[date]).dt.tz_convert(None)

In [145]:
## calculate total_booking_net columns
mt_df_nocalcs["total_booking_net_CHF"] = (
        mt_df_nocalcs["new_booking_net_CHF"] + mt_df_nocalcs["renewal_booking_net_CHF"]
)
mt_df_nocalcs["total_booking_net_EUR"] = (
        mt_df_nocalcs["new_booking_net_EUR"] + mt_df_nocalcs["renewal_booking_net_EUR"]
)

In [146]:
## drop unnecessary columns
mt_df_nocalcs.drop(
    [
        "new_booking_net_EUR",
        "new_booking_net_CHF",
        "renewal_booking_net_CHF",
        "renewal_booking_net_EUR",
    ],
    axis=1,
    inplace=True,
)

In [147]:
## append dataframes
mt_df_final = mt_df.append(mt_df_nocalcs).reset_index(drop=True)

## convert revenue_month_date to date
mt_df_final["revenue_month_date"] = pd.to_datetime(
    mt_df_final["revenue_month_date"]
).dt.date

In [148]:
## define table schema
bq_schema = [
    {"name": "transaction_id", "type": "STRING"},
    {"name": "zuid", "type": "INTEGER"},
    {"name": "payment_method", "type": "STRING"},
    {"name": "sku", "type": "STRING"},
    {"name": "type_of_transaction", "type": "STRING"},
    {"name": "transaction_date", "type": "TIMESTAMP"},
    {"name": "country_name", "type": "STRING"},
    {"name": "total_booking_net_CHF", "type": "FLOAT"},
    {"name": "total_booking_net_EUR", "type": "FLOAT"},
    {"name": "term_start", "type": "TIMESTAMP"},
    {"name": "term_end", "type": "TIMESTAMP"},
    {"name": "product_term_length", "type": "INTEGER"},
    {"name": "product_term_length_months", "type": "INTEGER"},
    {"name": "product_group_finance", "type": "STRING"},
    {"name": "revenue_month_number", "type": "INTEGER"},
    {"name": "revenue_month_date", "type": "DATE"},
    {"name": "total_revenue_net_EUR", "type": "FLOAT"},
    {"name": "total_revenue_net_CHF", "type": "FLOAT"},
    {"name": "active_sub_content", "type": "INTEGER"},
]

In [150]:
## export to BQ table
pandas_gbq.to_gbq(
    dataframe=mt_df_final,
    destination_table="temp.pypayment_v2_022022",
    project_id="zattoo-dataeng",
    if_exists="replace",
    progress_bar=None,
    table_schema=bq_schema,
    credentials=credentials
)

In [151]:
## export to csv
path = r"/Users/miguelcouto/Desktop/"

mt_df_final.to_csv(os.path.join(path, r"pypayment_v2_022022.csv"))

print("-----------")
print("JOB FINISHED")

-----------
JOB FINISHED
