In [117]:
import pandas as pd
import numpy as np
from itertools import product

In [118]:
start_month = "2023-08-01"

end_month = "2023-08-31"

dates = pd.date_range(
    start_month,
    end_month
)

country_list = [
    "KZ",
    "PT",
    "FR",
    "BR",
    "GR"
]

df_users_columns = {
    "customer_id": str,
    "registration_date": str,
    "first_deposit_date": str,
    "country": "category",
    "traffic_type": "category",
    "partner_id": str,
    "cost_usd": float
}

In [119]:
def read_users() -> pd.DataFrame:
    df_users = pd.read_csv(
        "users.csv",
        usecols = df_users_columns.keys(),
        dtype = df_users_columns,
        sep = ",",
        parse_dates = [
            "registration_date",
            "first_deposit_date"
        ]
    )

    return df_users


def create_spine(
        dates: list,
        country_list: list
) -> pd.DataFrame:
    data = product(country_list, dates)

    df_spine = pd.DataFrame(
        data,
        columns = [
            "country",
            "date"
        ]
    )

    return df_spine


def find_registration_dynamic_by_country(
        df_users: pd.DataFrame,
        df_spine: pd.DataFrame
) -> pd.DataFrame:
    df_users_grouped = df_users.groupby(
        [
            "country",
            "registration_date"
        ],
        as_index = False
    ).size()\
    .rename(
        columns = {
            "size": "new_registration_count",
            "registration_date": "date"
        }
    )

    df_spine = df_spine.merge(
        df_users_grouped,
        on = [
            "country",
            "date"
        ],
        how = "left"
    )

    df_spine["new_registration_count"] = df_spine["new_registration_count"].fillna(0.0)

    df_spine.to_csv(
        "registration_dynamic_by_country.csv",
        sep = ",",
        index = False
    )

    return df_spine


def find_first_deposits_dynamic_by_country(
        df_users: pd.DataFrame,
        df_spine: pd.DataFrame
) -> pd.DataFrame:
    df_users_grouped = df_users[
        df_users["first_deposit_date"].between(
            start_month,
            end_month
        )
    ].groupby(
        [
            "country",
            "first_deposit_date"
        ],
        as_index = False
    ).size()\
    .rename(
        columns = {
            "first_deposit_date": "date",
            "size": "first_deposit_count"
        }
    )

    df_spine = df_spine.merge(
        df_users_grouped,
        on = [
            "country",
            "date"
        ],
        how = "left"
    )

    df_spine["first_deposit_count"] = df_spine["first_deposit_count"].fillna(0.0)

    df_spine.to_csv(
        "first_deposits_dynamic_by_country.csv",
        index = False,
        sep = ","
    )
    
    return df_spine


def find_conversion_from_regist_to_deposit(
        df_users: pd.DataFrame,
        df_spine: pd.DataFrame
) -> pd.DataFrame:
    df_queryed_by_regist_date = df_users.groupby(
        [
            "registration_date",
            "country"
        ],
        as_index = False
    ).size()\
    .rename(
        columns = {
            "size": "new_customers_count",
            "registration_date": "date"
        }
    )

    df_queryed_by_regist_date_with_deposit = df_users[
        df_users["first_deposit_date"].between(
            start_month,
            end_month
        )    
    ].groupby(
        [
            "registration_date",
            "country"
        ],
        as_index = False,
    ).size()\
    .rename(
        columns = {
            "size": "new_customers_with_deposit_count",
            "registration_date": "date"
        }
    )

    df_spine = df_spine.merge(
        df_queryed_by_regist_date,
        on = [
            "date",
            "country"
        ],
        how = "left"
    ).merge(
        df_queryed_by_regist_date_with_deposit,
        on = [
            "date",
            "country"
        ],
        how = "left"
    )

    col_to_fill = [
        "new_customers_count",
        "new_customers_with_deposit_count"
    ]
    df_spine[col_to_fill] = df_spine[col_to_fill].fillna(0.0)
    

    df_spine["conversion, %"] = df_spine["new_customers_with_deposit_count"]\
    .div(df_spine["new_customers_count"])\
    .mul(100)\
    .round(3)

    df_spine.to_csv(
        "conversion_from_regist_to_deposit.csv",
        index = False,
        sep = ";"
    )

    return df_spine


def find_same_date_deposit(
        df_users: pd.DataFrame
) -> pd.DataFrame:
    df_group_total: pd.DataFrame = df_users.groupby(
        [
            "country"
        ],
        as_index = False
    ).size()\
    .rename(
        columns = {
            "size": "new_customers_total"
        }
    )
    
    df_group_total_same_date: pd.DataFrame = df_users[
        df_users["registration_date"].eq(
            df_users["first_deposit_date"]
        )
    ].groupby(
        [
            "country"
        ],
        as_index = False
    ).size()\
    .rename(
        columns = {
            "size": "new_customers_total_same_date"
        }
    )
    
    df_total = df_group_total.merge(
        df_group_total_same_date,
        on = [
            "country"
        ],
        how = "left"
    )

    df_total["same_date_deposit, %"] = df_total["new_customers_total_same_date"]\
    .div(df_total["new_customers_total"])\
    .mul(100)\
    .round(3)

    df_total.to_csv(
        "same_date_deposit.csv",
        index = False,
        sep = ";"
    )
    
    return df_total

In [120]:
df_users = read_users()

df_spine = create_spine(
    dates,
    country_list
)

df_users.head()

Unnamed: 0,customer_id,registration_date,first_deposit_date,country,traffic_type,partner_id,cost_usd
0,500000,2023-08-01,NaT,GR,paid,177.0,
1,500001,2023-08-01,NaT,BR,paid,143.0,
2,500002,2023-08-01,NaT,GR,organic,,
3,500003,2023-08-01,NaT,BR,paid,143.0,
4,500004,2023-08-01,NaT,PT,organic,,


# 1

In [121]:
df_registration_dynamic_by_country = find_registration_dynamic_by_country(
    df_users,
    df_spine
)

  df_users_grouped = df_users.groupby(


# 2

In [122]:
df_first_deposits_dynamic_by_country =  find_first_deposits_dynamic_by_country(
    df_users,
    df_spine
)

  ].groupby(


# 3

In [123]:
df_conversion_from_regist_to_deposit = find_conversion_from_regist_to_deposit(
    df_users,
    df_spine
)

  df_queryed_by_regist_date = df_users.groupby(
  ].groupby(


# 4

In [124]:
df_same_date_deposit = find_same_date_deposit(df_users)

  df_group_total: pd.DataFrame = df_users.groupby(
  ].groupby(
