In [63]:
import pandas as pd
import gspread
from gspread_dataframe import get_as_dataframe # <--- Import this
from google.oauth2.service_account import Credentials
from gspread_dataframe import set_with_dataframe
from google.cloud import bigquery

In [64]:
# --- 1. CONFIGURATION ---
CREDENTIALS_FILE = 'gen-lang-client-0209575391-96d90a513b0b.json'
SHEET_ID = '1hHV98ZAqng6ogy48iKDTP_tMVP8siwzgB8ybLSq40kU'
WORKSHEET_NAME = 'Transformed'

## READ DATA

In [65]:
# 1. Read the Parquet file
df = pd.read_parquet("daily_funding.parquet")
print(f"Successfully read parquet file with {len(df)} rows.")

# 2. String Normalization
df["brand"] = df["brand"].astype(str).str.upper().str.strip()

# 3. Ensure Dates are standard Datetime objects (Fixes filtering issues)
# Even if parquet stores them correctly, this ensures they are datetime64[ns]
# which allows for direct comparison like: df['Date'] >= '2025-11-01'
if 'Date' in df.columns:
    df['Date'] = pd.to_datetime(df['Date'])

Successfully read parquet file with 1333487 rows.


In [66]:
df["Max_InsertedAt"].max()

Timestamp('2026-02-02 14:33:43.106628+0000', tz='UTC')

In [67]:
client = bigquery.Client()

# ... [Keep your existing SQL query exactly as is] ...
sql_query = """
SELECT
  Brand AS brand,
  Sub_group AS sub_group,
  CASE
    WHEN Whitelabel = 'KZ' THEN 'KZO'
    ELSE 'KZP'
  END AS whitelabel
FROM `kz-dp-prod.MAPPING.brand_whitelabel_country_folderid_mapping_tbl`;
"""

dim_brand_v2 = client.query(sql_query).to_dataframe()



In [68]:
dim_brand_v2["brand"] = dim_brand_v2["brand"].str.strip().str.upper()

In [69]:
# dim_brand_v2 = pd.read_csv("mapping_brand_v2.csv") 
# dim_brand_v2 = dim_brand_v2[["brand","sub_group", "whitelabel"]]

In [70]:
dim_brand_v2.columns = ["brand", "account_group", "group_re"]

In [71]:
# dim_brand_v2.loc[dim_brand_v2["group_re"] == 'KZ', 'group_re'] = 'KZO'
# dim_brand_v2.loc[dim_brand_v2["group_re"] != 'KZO', 'group_re'] = 'KZP'
# dim_brand_v2.loc[(dim_brand_v2["group_re"] != 'KZP') & (dim_brand_v2["group_re"] != 'KZo'), 'group_re'] = 'KZO'

In [72]:
dim_brand_v2["brand"] = dim_brand_v2["brand"].str.upper().str.strip()

In [73]:
df[((df["status"] == "completed") | (df["status"] == "timeout") | (df["status"] == "error")) & (df["Country"] == "BD")]

Unnamed: 0,Date,providerKey,method,channel_type,type,reqCurrency,account_group,group_re,Country,status,...,Count_01m31s_to_02m00s,Count_02m01s_to_03m00s,Count_03m00s_Above,Count_03m00s_Below,Count_03m31s_to_05m00s,Count_05m00s_to_10m00s,Count_10m00s_Above,providerName,channel_main,brand


In [74]:
df = df[[i for i in df.columns if i not in ["group_re", "account_group"]]].merge(dim_brand_v2, on = "brand", how = "left")

In [75]:
df_deposit = df[df['type'] == 'DEPOSIT']

In [76]:
df_deposit

Unnamed: 0,Date,providerKey,method,channel_type,type,reqCurrency,Country,status,Hour,Count,...,Count_03m00s_Above,Count_03m00s_Below,Count_03m31s_to_05m00s,Count_05m00s_to_10m00s,Count_10m00s_Above,providerName,channel_main,brand,account_group,group_re
0,2025-12-23,toppay-mx,toppay-mx/bank-transfer,bank-transfer,DEPOSIT,MXN,Mexico,completed,15:00 - 15:59,41,...,12,29,9,0,3,toppay-mx,bank,MXWOW,KZG1,KZO
2,2026-01-15,gxppay-pk,gxppay-pk/easypaisa,easypaisa,DEPOSIT,PKR,Pakistan,timeout,14:00 - 14:59,35,...,0,0,0,0,0,gxppay-pk,easypaisa,WOWPK,KZG1,KZO
3,2025-12-18,mmpay,mmpay/gcash-direct,gcash-direct,DEPOSIT,PHP,Philippines,completed,11:00 - 11:59,129,...,17,112,14,1,2,mmpay,gcash,WINMAYA,PHBLG1,KZP
4,2025-12-29,eastpay,eastpay/clabe,clabe,DEPOSIT,MXN,Mexico,completed,11:00 - 11:59,53,...,17,36,10,6,1,eastpay,clabe,MXWOW,KZG1,KZO
6,2025-11-28,mmpay,mmpay/gcash-direct,gcash-direct,DEPOSIT,PHP,Philippines,timeout,08:00 - 08:59,79,...,0,0,0,0,0,mmpay,gcash,WINMAYA,PHBLG1,KZP
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1416674,2026-02-01,wepay,wepay/qr-code-native,qr-code-native,DEPOSIT,THB,Thailand,timeout,10:00 - 10:59,1,...,0,0,0,0,0,wepay,qr,MIX77,BLG1,KZP
1416675,2026-02-02,wepay,wepay/qr-code-native,qr-code-native,DEPOSIT,THB,Thailand,completed,11:00 - 11:59,1,...,0,1,0,0,0,wepay,qr,BEER78,KZG2,KZO
1416676,2026-01-31,wepay,wepay/qr-code-native,qr-code-native,DEPOSIT,THB,Thailand,error,10:00 - 10:59,1,...,0,0,0,0,0,wepay,qr,XOX66,WDB1,KZP
1416677,2026-01-28,wepay,wepay/qr-code-native,qr-code-native,DEPOSIT,THB,Thailand,timeout,14:00 - 14:59,1,...,0,0,0,0,0,wepay,qr,BIG188,KZG2,KZO


In [77]:
df_withdraw = df[df['type'] == 'WITHDRAW']

## DEPOSIT DATA

In [78]:
df_deposit = df_deposit[df_deposit["Date"] >= '2025-11-01']

In [79]:
df_deposit

Unnamed: 0,Date,providerKey,method,channel_type,type,reqCurrency,Country,status,Hour,Count,...,Count_03m00s_Above,Count_03m00s_Below,Count_03m31s_to_05m00s,Count_05m00s_to_10m00s,Count_10m00s_Above,providerName,channel_main,brand,account_group,group_re
0,2025-12-23,toppay-mx,toppay-mx/bank-transfer,bank-transfer,DEPOSIT,MXN,Mexico,completed,15:00 - 15:59,41,...,12,29,9,0,3,toppay-mx,bank,MXWOW,KZG1,KZO
2,2026-01-15,gxppay-pk,gxppay-pk/easypaisa,easypaisa,DEPOSIT,PKR,Pakistan,timeout,14:00 - 14:59,35,...,0,0,0,0,0,gxppay-pk,easypaisa,WOWPK,KZG1,KZO
3,2025-12-18,mmpay,mmpay/gcash-direct,gcash-direct,DEPOSIT,PHP,Philippines,completed,11:00 - 11:59,129,...,17,112,14,1,2,mmpay,gcash,WINMAYA,PHBLG1,KZP
4,2025-12-29,eastpay,eastpay/clabe,clabe,DEPOSIT,MXN,Mexico,completed,11:00 - 11:59,53,...,17,36,10,6,1,eastpay,clabe,MXWOW,KZG1,KZO
6,2025-11-28,mmpay,mmpay/gcash-direct,gcash-direct,DEPOSIT,PHP,Philippines,timeout,08:00 - 08:59,79,...,0,0,0,0,0,mmpay,gcash,WINMAYA,PHBLG1,KZP
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1416674,2026-02-01,wepay,wepay/qr-code-native,qr-code-native,DEPOSIT,THB,Thailand,timeout,10:00 - 10:59,1,...,0,0,0,0,0,wepay,qr,MIX77,BLG1,KZP
1416675,2026-02-02,wepay,wepay/qr-code-native,qr-code-native,DEPOSIT,THB,Thailand,completed,11:00 - 11:59,1,...,0,1,0,0,0,wepay,qr,BEER78,KZG2,KZO
1416676,2026-01-31,wepay,wepay/qr-code-native,qr-code-native,DEPOSIT,THB,Thailand,error,10:00 - 10:59,1,...,0,0,0,0,0,wepay,qr,XOX66,WDB1,KZP
1416677,2026-01-28,wepay,wepay/qr-code-native,qr-code-native,DEPOSIT,THB,Thailand,timeout,14:00 - 14:59,1,...,0,0,0,0,0,wepay,qr,BIG188,KZG2,KZO


In [80]:
# import pandas as pd
# import gspread
# from gspread_dataframe import set_with_dataframe
# from google.oauth2.service_account import Credentials

# # --- 1. CONFIGURATION ---
# CREDENTIALS_FILE = 'gen-lang-client-0209575391-96d90a513b0b.json'
# SHEET_ID = '1Nq9u4bg0tvLnUutVh2TcxXOxe-G2E65kxy_pbJ8pce4'
# WORKSHEET_NAME = 'Deposit Data'

# def write_df_to_gsheet(dataframe):
#     # --- 2. AUTHENTICATION ---
#     # Define the scope (permissions) required
#     scopes = [
#         'https://www.googleapis.com/auth/spreadsheets',
#         'https://www.googleapis.com/auth/drive'
#     ]
    
#     # Authenticate using the JSON key file
#     credentials = Credentials.from_service_account_file(
#         CREDENTIALS_FILE, 
#         scopes=scopes
#     )
#     gc = gspread.authorize(credentials)

#     # --- 3. CONNECT TO SHEET ---
#     print(f"Opening sheet ID: {SHEET_ID}...")
#     sh = gc.open_by_key(SHEET_ID)
    
#     try:
#         worksheet = sh.worksheet(WORKSHEET_NAME)
#     except gspread.WorksheetNotFound:
#         print(f"Worksheet '{WORKSHEET_NAME}' not found. Creating it...")
#         worksheet = sh.add_worksheet(title=WORKSHEET_NAME, rows="100", cols="20")

#     # --- 4. WRITE DATA ---
#     print("Clearing old data...")
#     worksheet.clear()  # Removes old data so rows don't overlap
    
#     print(f"Writing {len(dataframe)} rows to Google Sheets...")
#     # 'set_with_dataframe' handles headers and NaN values automatically
#     set_with_dataframe(worksheet, dataframe)
    
#     print("Done!")

# # --- Usage ---
# if __name__ == "__main__":
#     # Assuming 'df' exists from your previous code
#     # If not, uncomment the line below to test with dummy data:
#     # df = pd.DataFrame({'Date': ['2023-01-01'], 'Amount': [100]})
    
#     write_df_to_gsheet(df_deposit)

In [81]:
df_deposit.loc[df_deposit["status"] == "completed", "Count Success"] = df_deposit["Count"]

df_deposit.loc[df_deposit["status"].str.contains("error", na=False), "Count Error"] = df_deposit["Count"]

df_deposit.loc[df_deposit["status"].str.contains("timeout", na=False), "Count Timeout"] = df_deposit["Count"]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_deposit.loc[df_deposit["status"] == "completed", "Count Success"] = df_deposit["Count"]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_deposit.loc[df_deposit["status"].str.contains("error", na=False), "Count Error"] = df_deposit["Count"]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_depos

In [82]:
df_deposit.loc[df_deposit["status"].str.contains("error", na=False)].shape[0] + df_deposit.loc[df_deposit["status"].str.contains("timeout", na=False)].shape[0] + df_deposit.loc[df_deposit["status"] == "completed", "Count Success"].shape[0]

1048492

In [83]:
df_deposit["Count"].sum()


np.int64(30029283)

In [84]:
# 1. Define your columns
group_cols = ["Date", "providerName", "channel_type", "type", "Country", "account_group", "group_re", "Hour"]

sum_cols = [
    "Count", "Count Success", "Count Error", "Count Timeout",
    "Total_Net_Amount", 
    "winsorized_total_time_seconds", 
    'Count_01m30s_Below', 'Count_01m31s_to_02m00s',
       'Count_02m01s_to_03m00s', 'Count_03m00s_Above'
]

# 2. Perform Group By and Sum
df_grouped_dep = df_deposit.groupby(group_cols)[sum_cols].sum().reset_index()

# 3. (Optional) Inspect the result
print(df_grouped_dep.head())

        Date providerName channel_type     type   Country account_group  \
0 2025-11-01        aipay      qr-code  DEPOSIT  Thailand          96G1   
1 2025-11-01        aipay      qr-code  DEPOSIT  Thailand          96G1   
2 2025-11-01        aipay      qr-code  DEPOSIT  Thailand          96G1   
3 2025-11-01        aipay      qr-code  DEPOSIT  Thailand          96G1   
4 2025-11-01        aipay      qr-code  DEPOSIT  Thailand          KZG1   

  group_re           Hour  Count  Count Success  Count Error  Count Timeout  \
0      KZP  13:00 - 13:59    701            490            0            211   
1      KZP  14:00 - 14:59   1082            753            0            329   
2      KZP  15:00 - 15:59     74             46            0             28   
3      KZP  16:00 - 16:59    109             77            0             32   
4      KZO  12:00 - 12:59     30             21            1              8   

   Total_Net_Amount  winsorized_total_time_seconds  Count_01m30s_Below  \


In [85]:
import pandas as pd
from datetime import timedelta

# --- 1. PREPARE DATA & INDICES ---
# Assumes df_grouped has [Date, Hour, providerName, etc., Count, NetAmount...]

df_grouped_dep['Date'] = pd.to_datetime(df_grouped_dep['Date'])
df_grouped_dep['Week_Index'] = df_grouped_dep['Date'].dt.isocalendar().week.astype(int)

# Helper: Day of Week (0=Mon, 6=Sun)
df_grouped_dep['DayOfWeek'] = df_grouped_dep['Date'].dt.dayofweek

# --- 2. DETERMINE THE CUTOFF (Based on Current Week) ---
print("Identifying Cutoff from Current Week...")

# Identify the Current Week (Highest Index)
current_week_idx = df_grouped_dep['Week_Index'].max()
current_week_data = df_grouped_dep[df_grouped_dep['Week_Index'] == current_week_idx]

if not current_week_data.empty:
    # 1. How far into the week are we? (e.g., Today is Monday = 0)
    cutoff_day_idx = current_week_data['DayOfWeek'].max()
    
    # 2. What is the latest hour on that specific day?
    cutoff_hour = current_week_data.loc[
        current_week_data['DayOfWeek'] == cutoff_day_idx, 'Hour'
    ].max()
    
    print(f" -> Cutoff Point: Day {cutoff_day_idx} (Mon=0) at {cutoff_hour}")
else:
    # Fallback if current week is empty
    cutoff_day_idx = 6 # Sunday
    cutoff_hour = "23:59 - 23:59"
    print(" -> No current data. defaulting to full week.")

# --- 3. CREATE THE 'MARK' COLUMN ---
print("Tagging rows with 'pacing_mark'...")

def get_pacing_mark(row):
    # Rule 1: Always include the current week (it is the reference)
    if row['Week_Index'] == current_week_idx:
        return 'Included'
    
    # Rule 2: For historical weeks, compare Day & Hour
    # A. If the day is EARLIER in the week (e.g. Current is Wed, Row is Mon) -> Keep
    if row['DayOfWeek'] < cutoff_day_idx:
        return 'Included'
    
    # B. If it's the SAME Day, check the Hour
    if row['DayOfWeek'] == cutoff_day_idx:
        if row['Hour'] <= cutoff_hour:
            return 'Included'
        else:
            return 'Excluded' # Same day, but later hour
            
    # C. If the day is LATER in the week (e.g. Current is Mon, Row is Tue) -> Exclude
    if row['DayOfWeek'] > cutoff_day_idx:
        return 'Excluded'

    return 'Excluded'

# Apply the logic
df_grouped_dep['pacing_mark'] = df_grouped_dep.apply(get_pacing_mark, axis=1)

# --- 4. GENERATE LABELS ---
print("Generating Week Labels...")
# Week start (Monday)
df_grouped_dep['Week_Start'] = df_grouped_dep['Date'] - pd.to_timedelta(df_grouped_dep['Date'].dt.dayofweek, unit='D')

# Week end (Sunday)
df_grouped_dep['Week_End'] = df_grouped_dep['Week_Start'] + pd.to_timedelta(6, unit='D')

# Use Week_End for label
df_grouped_dep['Week_Label'] = (
    df_grouped_dep['Week_End'].dt.day.astype(str) + " " +
    df_grouped_dep['Week_End'].dt.strftime('%b %y')
)

# --- 5. AGGREGATE (INCLUDE 'pacing_mark' IN GROUP BY) ---
print("Aggregating...")

final_group_cols = [
    "Week_Label", 
    "Week_Index", 
    "pacing_mark", # <--- CRITICAL: Keep this so you can filter in Looker
    "providerName", "channel_type", "type", "Country", "account_group", "group_re"
]
sum_cols = [
    "Count", "Count Success", "Count Error", "Count Timeout",
    "Total_Net_Amount", 
    "winsorized_total_time_seconds", 
    'Count_01m30s_Below', 'Count_01m31s_to_02m00s',
       'Count_02m01s_to_03m00s', 'Count_03m00s_Above'
]

df_final_dep = df_grouped_dep.groupby(final_group_cols)[sum_cols].sum().reset_index()

# Sort DESC so Week 49 is top
df_final_dep = df_final_dep.sort_values(by=['Week_Index', 'pacing_mark'], ascending=[False, True])

# print(df_final[['Week_Label', 'pacing_mark', 'Count']].head())

# write_df_to_gsheet(df_final)



Identifying Cutoff from Current Week...
 -> Cutoff Point: Day 6 (Mon=0) at 23:00 - 23:59
Tagging rows with 'pacing_mark'...
Generating Week Labels...
Aggregating...


In [86]:
df_final_dep.loc[df_final_dep["Week_Label"].str.contains("26"), "Week_Index"] += 52

In [87]:
df_final_dep

Unnamed: 0,Week_Label,Week_Index,pacing_mark,providerName,channel_type,type,Country,account_group,group_re,Count,Count Success,Count Error,Count Timeout,Total_Net_Amount,winsorized_total_time_seconds,Count_01m30s_Below,Count_01m31s_to_02m00s,Count_02m01s_to_03m00s,Count_03m00s_Above
1618,28 Dec 25,52,Included,aipay,qr-code,DEPOSIT,Thailand,96G1,KZP,1306,979,27,300,1074492.00,77288.0,771,101,59,48
1619,28 Dec 25,52,Included,aipay,qr-code,DEPOSIT,Thailand,KZG1,KZO,6795,5183,107,1505,2549155.39,600258.0,2480,1274,1019,410
1620,28 Dec 25,52,Included,aipay,qr-code,DEPOSIT,Thailand,KZG1,KZP,1222,850,16,356,434389.00,110474.0,374,210,168,98
1621,28 Dec 25,52,Included,aipay,qr-code,DEPOSIT,Thailand,KZG2,KZO,6413,4899,206,1308,1987161.60,523034.0,2598,1121,853,327
1622,28 Dec 25,52,Included,aipay,qr-code,DEPOSIT,Thailand,Others,KZP,1641,1206,44,391,599058.00,129771.0,606,303,221,76
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2171,4 Jan 26,53,Included,xqpay-bd,nagad,DEPOSIT,Bangladesh,KZG1,KZO,6404,4562,21,1821,2455661.00,315185.0,3831,419,202,110
2172,4 Jan 26,53,Included,xqpay-bd,rocket,DEPOSIT,Bangladesh,KZG1,KZO,243,120,0,123,76925.00,17051.0,56,39,11,14
2173,4 Jan 26,53,Included,ydpay-mx,clabe,DEPOSIT,Mexico,KZG1,KZO,446,338,3,105,27961.67,45984.0,137,64,58,79
2174,4 Jan 26,53,Included,yzpay-th,bank-transfer,DEPOSIT,Thailand,BLG1,KZP,192,84,0,108,49513.07,38242.0,0,10,7,67


In [88]:
df_final_dep.columns = ['Week_Label', 'Week_Index', 'pacing_mark','providerName', 'channel_type', 'type', 'Country', 'account_group', "group_re",
        'DP Count', "Count Success", "Count Error", "Count Timeout", 'Total Amount DP',
       'Avg DP Time',     'Count_01m30s_Below', 'Count_01m31s_to_02m00s',
       'Count_02m01s_to_03m00s', 'Count_03m00s_Above']

# cond1 = df_final_dep["Week_Index"] == (df_final_dep["Week_Index"].max() - 1)
# cond2 = df_final_dep["pacing_mark"] == "Excluded"
# df_final_dep.loc[~(cond1 & cond2), "pacing_mark"] = None

df_final_dep["Week_Index"] = df_final_dep["Week_Index"] - df_final_dep["Week_Index"].max()

# df_final_dep = df_final_dep[df_final_dep["Week_Index"] != 0 ]
# df_final_dep["Week_Index"] = df_final_dep["Week_Index"] + 1
# df_final_dep = df_final_dep[df_final_dep["Week_Index"] >= -5]

In [89]:
df_final_dep

Unnamed: 0,Week_Label,Week_Index,pacing_mark,providerName,channel_type,type,Country,account_group,group_re,DP Count,Count Success,Count Error,Count Timeout,Total Amount DP,Avg DP Time,Count_01m30s_Below,Count_01m31s_to_02m00s,Count_02m01s_to_03m00s,Count_03m00s_Above
1618,28 Dec 25,-6,Included,aipay,qr-code,DEPOSIT,Thailand,96G1,KZP,1306,979,27,300,1074492.00,77288.0,771,101,59,48
1619,28 Dec 25,-6,Included,aipay,qr-code,DEPOSIT,Thailand,KZG1,KZO,6795,5183,107,1505,2549155.39,600258.0,2480,1274,1019,410
1620,28 Dec 25,-6,Included,aipay,qr-code,DEPOSIT,Thailand,KZG1,KZP,1222,850,16,356,434389.00,110474.0,374,210,168,98
1621,28 Dec 25,-6,Included,aipay,qr-code,DEPOSIT,Thailand,KZG2,KZO,6413,4899,206,1308,1987161.60,523034.0,2598,1121,853,327
1622,28 Dec 25,-6,Included,aipay,qr-code,DEPOSIT,Thailand,Others,KZP,1641,1206,44,391,599058.00,129771.0,606,303,221,76
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2171,4 Jan 26,-5,Included,xqpay-bd,nagad,DEPOSIT,Bangladesh,KZG1,KZO,6404,4562,21,1821,2455661.00,315185.0,3831,419,202,110
2172,4 Jan 26,-5,Included,xqpay-bd,rocket,DEPOSIT,Bangladesh,KZG1,KZO,243,120,0,123,76925.00,17051.0,56,39,11,14
2173,4 Jan 26,-5,Included,ydpay-mx,clabe,DEPOSIT,Mexico,KZG1,KZO,446,338,3,105,27961.67,45984.0,137,64,58,79
2174,4 Jan 26,-5,Included,yzpay-th,bank-transfer,DEPOSIT,Thailand,BLG1,KZP,192,84,0,108,49513.07,38242.0,0,10,7,67


## WITHDRAWL DATA

In [90]:
df_final_dep

Unnamed: 0,Week_Label,Week_Index,pacing_mark,providerName,channel_type,type,Country,account_group,group_re,DP Count,Count Success,Count Error,Count Timeout,Total Amount DP,Avg DP Time,Count_01m30s_Below,Count_01m31s_to_02m00s,Count_02m01s_to_03m00s,Count_03m00s_Above
1618,28 Dec 25,-6,Included,aipay,qr-code,DEPOSIT,Thailand,96G1,KZP,1306,979,27,300,1074492.00,77288.0,771,101,59,48
1619,28 Dec 25,-6,Included,aipay,qr-code,DEPOSIT,Thailand,KZG1,KZO,6795,5183,107,1505,2549155.39,600258.0,2480,1274,1019,410
1620,28 Dec 25,-6,Included,aipay,qr-code,DEPOSIT,Thailand,KZG1,KZP,1222,850,16,356,434389.00,110474.0,374,210,168,98
1621,28 Dec 25,-6,Included,aipay,qr-code,DEPOSIT,Thailand,KZG2,KZO,6413,4899,206,1308,1987161.60,523034.0,2598,1121,853,327
1622,28 Dec 25,-6,Included,aipay,qr-code,DEPOSIT,Thailand,Others,KZP,1641,1206,44,391,599058.00,129771.0,606,303,221,76
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2171,4 Jan 26,-5,Included,xqpay-bd,nagad,DEPOSIT,Bangladesh,KZG1,KZO,6404,4562,21,1821,2455661.00,315185.0,3831,419,202,110
2172,4 Jan 26,-5,Included,xqpay-bd,rocket,DEPOSIT,Bangladesh,KZG1,KZO,243,120,0,123,76925.00,17051.0,56,39,11,14
2173,4 Jan 26,-5,Included,ydpay-mx,clabe,DEPOSIT,Mexico,KZG1,KZO,446,338,3,105,27961.67,45984.0,137,64,58,79
2174,4 Jan 26,-5,Included,yzpay-th,bank-transfer,DEPOSIT,Thailand,BLG1,KZP,192,84,0,108,49513.07,38242.0,0,10,7,67


In [91]:
# import pandas as pd
# import gspread
# from gspread_dataframe import set_with_dataframe
# from google.oauth2.service_account import Credentials

# # --- 1. CONFIGURATION ---
# CREDENTIALS_FILE = 'gen-lang-client-0209575391-96d90a513b0b.json'
# SHEET_ID = '1Nq9u4bg0tvLnUutVh2TcxXOxe-G2E65kxy_pbJ8pce4'
# WORKSHEET_NAME = 'Withdrawal Data'

# def write_df_to_gsheet(dataframe):
#     # --- 2. AUTHENTICATION ---
#     # Define the scope (permissions) required
#     scopes = [
#         'https://www.googleapis.com/auth/spreadsheets',
#         'https://www.googleapis.com/auth/drive'
#     ]
    
#     # Authenticate using the JSON key file
#     credentials = Credentials.from_service_account_file(
#         CREDENTIALS_FILE, 
#         scopes=scopes
#     )
#     gc = gspread.authorize(credentials)

#     # --- 3. CONNECT TO SHEET ---
#     print(f"Opening sheet ID: {SHEET_ID}...")
#     sh = gc.open_by_key(SHEET_ID)
    
#     try:
#         worksheet = sh.worksheet(WORKSHEET_NAME)
#     except gspread.WorksheetNotFound:
#         print(f"Worksheet '{WORKSHEET_NAME}' not found. Creating it...")
#         worksheet = sh.add_worksheet(title=WORKSHEET_NAME, rows="100", cols="20")

#     # --- 4. WRITE DATA ---
#     print("Clearing old data...")
#     worksheet.clear()  # Removes old data so rows don't overlap
    
#     print(f"Writing {len(dataframe)} rows to Google Sheets...")
#     # 'set_with_dataframe' handles headers and NaN values automatically
#     set_with_dataframe(worksheet, dataframe)
    
#     print("Done!")

# # --- Usage ---
# if __name__ == "__main__":
#     # Assuming 'df' exists from your previous code
#     # If not, uncomment the line below to test with dummy data:
#     # df = pd.DataFrame({'Date': ['2023-01-01'], 'Amount': [100]})
    
#     write_df_to_gsheet(df_withdraw)

In [92]:
df_withdraw[["Date", "providerName", "channel_type", "type" , "Country", "account_group"]]

Unnamed: 0,Date,providerName,channel_type,type,Country,account_group
1,2026-01-11,worldpay,nagad,WITHDRAW,Bangladesh,KZG1
5,2025-12-09,rmpay,paymaya,WITHDRAW,Philippines,PHBLG1
8,2026-01-07,all2pay-pk,easypaisa,WITHDRAW,Pakistan,KZG1
18,2025-11-08,aipay,bank-transfer,WITHDRAW,Thailand,KZG1
21,2026-01-19,dippay,bank-transfer,WITHDRAW,Thailand,KZG1
...,...,...,...,...,...,...
1415319,2026-01-28,tppopenai,gcash,WITHDRAW,Philippines,KZPHG1
1415322,2026-01-30,tppopenai,gcash,WITHDRAW,Philippines,KZPHG1
1415326,2026-01-31,tppopenai,gcash,WITHDRAW,Philippines,KZPHG1
1415328,2026-01-31,tppopenai,gcash,WITHDRAW,Philippines,KZPHG1


In [93]:
df_withdraw = df_withdraw[df_withdraw["Date"] >= "2025-11-01"]

In [94]:
df_withdraw[["Count", "Total_Net_Amount", "winsorized_total_time_seconds", "Count_03m00s_Below", "Count_03m31s_to_05m00s", "Count_05m00s_to_10m00s", 'Count_10m00s_Above']]

Unnamed: 0,Count,Total_Net_Amount,winsorized_total_time_seconds,Count_03m00s_Below,Count_03m31s_to_05m00s,Count_05m00s_to_10m00s,Count_10m00s_Above
1,13,19768.0,480.0,13,0,0,0
5,6,1919.0,300.0,6,0,0,0
8,1,2400.0,25.0,1,0,0,0
18,5,8313.0,747.0,4,0,1,0
21,4,4708.0,1258.0,1,1,2,0
...,...,...,...,...,...,...,...
1415319,1,1500.0,47.0,1,0,0,0
1415322,2,6701.0,130.0,2,0,0,0
1415326,1,1500.0,51.0,1,0,0,0
1415328,1,2050.0,52.0,1,0,0,0


In [95]:
# 1. Define your columns
group_cols = ["Date", "providerName", "channel_type", "type", "Country", "account_group", "group_re", "Hour"]

sum_cols = [
    "Count", 
    "Total_Net_Amount", 
    "winsorized_total_time_seconds", 
    "Count_03m00s_Below", 
    "Count_03m31s_to_05m00s", 
    "Count_05m00s_to_10m00s", 
    "Count_10m00s_Above"
]

# 2. Perform Group By and Sum
df_grouped = df_withdraw.groupby(group_cols)[sum_cols].sum().reset_index()

# 3. (Optional) Inspect the result
print(df_grouped.head())

        Date providerName   channel_type      type   Country account_group  \
0 2025-11-01        aipay  bank-transfer  WITHDRAW  Thailand          96G1   
1 2025-11-01        aipay  bank-transfer  WITHDRAW  Thailand          96G1   
2 2025-11-01        aipay  bank-transfer  WITHDRAW  Thailand          96G1   
3 2025-11-01        aipay  bank-transfer  WITHDRAW  Thailand          96G1   
4 2025-11-01        aipay  bank-transfer  WITHDRAW  Thailand          96G1   

  group_re           Hour  Count  Total_Net_Amount  \
0      KZP  14:00 - 14:59      5           57061.0   
1      KZP  15:00 - 15:59     23          115236.0   
2      KZP  16:00 - 16:59     68          298626.0   
3      KZP  17:00 - 17:59     42          112684.0   
4      KZP  18:00 - 18:59     10           21552.0   

   winsorized_total_time_seconds  Count_03m00s_Below  Count_03m31s_to_05m00s  \
0                         1631.0                   0                       2   
1                         3550.0              

In [96]:
import pandas as pd
from datetime import timedelta

# --- 1. PREPARE DATA & INDICES ---
# Assumes df_grouped has [Date, Hour, providerName, etc., Count, NetAmount...]

print("Calculating Week Indices...")
df_grouped['Date'] = pd.to_datetime(df_grouped['Date'])
df_grouped['Week_Index'] = df_grouped['Date'].dt.isocalendar().week.astype(int)

# Helper: Day of Week (0=Mon, 6=Sun)
df_grouped['DayOfWeek'] = df_grouped['Date'].dt.dayofweek

# --- 2. DETERMINE THE CUTOFF (Based on Current Week) ---
print("Identifying Cutoff from Current Week...")

# Identify the Current Week (Highest Index)
current_week_idx = df_grouped['Week_Index'].max()
current_week_data = df_grouped[df_grouped['Week_Index'] == current_week_idx]

if not current_week_data.empty:
    # 1. How far into the week are we? (e.g., Today is Monday = 0)
    cutoff_day_idx = current_week_data['DayOfWeek'].max()
    
    # 2. What is the latest hour on that specific day?
    cutoff_hour = current_week_data.loc[
        current_week_data['DayOfWeek'] == cutoff_day_idx, 'Hour'
    ].max()
    
    print(f" -> Cutoff Point: Day {cutoff_day_idx} (Mon=0) at {cutoff_hour}")
else:
    # Fallback if current week is empty
    cutoff_day_idx = 6 # Sunday
    cutoff_hour = "23:59 - 23:59"
    print(" -> No current data. defaulting to full week.")

# --- 3. CREATE THE 'MARK' COLUMN ---
print("Tagging rows with 'pacing_mark'...")

def get_pacing_mark(row):
    # Rule 1: Always include the current week (it is the reference)
    if row['Week_Index'] == current_week_idx:
        return 'Included'
    
    # Rule 2: For historical weeks, compare Day & Hour
    # A. If the day is EARLIER in the week (e.g. Current is Wed, Row is Mon) -> Keep
    if row['DayOfWeek'] < cutoff_day_idx:
        return 'Included'
    
    # B. If it's the SAME Day, check the Hour
    if row['DayOfWeek'] == cutoff_day_idx:
        if row['Hour'] <= cutoff_hour:
            return 'Included'
        else:
            return 'Excluded' # Same day, but later hour
            
    # C. If the day is LATER in the week (e.g. Current is Mon, Row is Tue) -> Exclude
    if row['DayOfWeek'] > cutoff_day_idx:
        return 'Excluded'

    return 'Excluded'

# Apply the logic
df_grouped['pacing_mark'] = df_grouped.apply(get_pacing_mark, axis=1)

# --- 4. GENERATE LABELS ---
print("Generating Week Labels...")
# Week start (Monday)
df_grouped['Week_Start'] = df_grouped['Date'] - pd.to_timedelta(df_grouped['Date'].dt.dayofweek, unit='D')

# Week end (Sunday)
df_grouped['Week_End'] = df_grouped['Week_Start'] + pd.to_timedelta(6, unit='D')

# Use Week_End for label
df_grouped['Week_Label'] = (
    df_grouped['Week_End'].dt.day.astype(str) + " " +
    df_grouped['Week_End'].dt.strftime('%b %y')
)


# --- 5. AGGREGATE (INCLUDE 'pacing_mark' IN GROUP BY) ---
print("Aggregating...")

final_group_cols = [
    "Week_Label", 
    "Week_Index", 
    "pacing_mark", # <--- CRITICAL: Keep this so you can filter in Looker
    "providerName", "channel_type", "type", "Country", "account_group", "group_re"
]

sum_cols = [
    "Count", "Total_Net_Amount", "winsorized_total_time_seconds", 
    "Count_03m00s_Below", "Count_03m31s_to_05m00s", 
    "Count_05m00s_to_10m00s", "Count_10m00s_Above"
]

df_final = df_grouped.groupby(final_group_cols)[sum_cols].sum().reset_index()

# Sort DESC so Week 49 is top
df_final = df_final.sort_values(by=['Week_Index', 'pacing_mark'], ascending=[False, True])

print(df_final[['Week_Label', 'pacing_mark', 'Count']].head())

# write_df_to_gsheet(df_final)

Calculating Week Indices...
Identifying Cutoff from Current Week...
 -> Cutoff Point: Day 6 (Mon=0) at 23:00 - 23:59
Tagging rows with 'pacing_mark'...
Generating Week Labels...
Aggregating...
     Week_Label pacing_mark  Count
1100  28 Dec 25    Included    418
1101  28 Dec 25    Included    347
1102  28 Dec 25    Included     64
1103  28 Dec 25    Included    130
1104  28 Dec 25    Included     31


In [97]:
df_final.loc[df_final["Week_Label"].str.contains("26"), "Week_Index"] += 52

In [98]:
df_final.columns = ['Week_Label', 'Week_Index', 'pacing_mark','providerName', 'channel_type', 'type',
       'Country', 'account_group', 'group_re','WD Count', 'Total Amount WD',
       'Avg WD Time', 'Count_03m00s_Below',
       'Count_03m31s_to_05m00s', 'Count_05m00s_to_10m00s',
       'Count_10m00s_Above']
       
# cond1 = df_final["Week_Index"] == (df_final["Week_Index"].max() - 1)
# cond2 = df_final["pacing_mark"] == "Excluded"
# df_final.loc[~(cond1 & cond2), "pacing_mark"] = None

df_final["Week_Index"] = df_final["Week_Index"] - df_final["Week_Index"].max()

# df_final = df_final[df_final["Week_Index"] != 0 ]
# df_final["Week_Index"] = df_final["Week_Index"] + 1
# df_final = df_final[df_final["Week_Index"] >= -5]

In [99]:
df_final

Unnamed: 0,Week_Label,Week_Index,pacing_mark,providerName,channel_type,type,Country,account_group,group_re,WD Count,Total Amount WD,Avg WD Time,Count_03m00s_Below,Count_03m31s_to_05m00s,Count_05m00s_to_10m00s,Count_10m00s_Above
1100,28 Dec 25,-6,Included,aipay,bank-transfer,WITHDRAW,Thailand,96G1,KZP,418,990764.0,68219.0,334,3,47,34
1101,28 Dec 25,-6,Included,aipay,bank-transfer,WITHDRAW,Thailand,KZG1,KZO,347,2512641.0,165229.0,216,14,32,85
1102,28 Dec 25,-6,Included,aipay,bank-transfer,WITHDRAW,Thailand,KZG1,KZP,64,567532.0,35485.0,40,0,5,19
1103,28 Dec 25,-6,Included,aipay,bank-transfer,WITHDRAW,Thailand,KZG2,KZO,130,890719.0,53442.0,72,1,15,42
1104,28 Dec 25,-6,Included,aipay,bank-transfer,WITHDRAW,Thailand,Others,KZP,31,114278.0,11765.0,18,0,5,8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1447,4 Jan 26,-5,Included,xpay-pk,easypaisa,WITHDRAW,Pakistan,KZG1,KZO,325,1071114.0,22511.0,311,5,5,4
1448,4 Jan 26,-5,Included,xqpay-bd,bkash,WITHDRAW,Bangladesh,KZG1,KZO,1185,1164546.0,70677.0,1166,5,8,6
1449,4 Jan 26,-5,Included,xqpay-bd,nagad,WITHDRAW,Bangladesh,KZG1,KZO,952,864338.0,48122.0,944,3,2,3
1450,4 Jan 26,-5,Included,xqpay-bd,rocket,WITHDRAW,Bangladesh,KZG1,KZO,12,6585.0,2600.0,6,2,3,1


In [100]:
df_total_final = pd.concat([df_final_dep, df_final], axis=0)

In [101]:
df_total_prev = df_total_final[df_total_final["Week_Index"] < 0]
df_total_prev["Week_Index"] = df_total_prev["Week_Index"] + 1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_total_prev["Week_Index"] = df_total_prev["Week_Index"] + 1


In [102]:
df_total_prev.columns = ['Week_Label', 'Week_Index', 'pacing_mark', 'providerName',
       'channel_type', 'type', 'Country', 'account_group', 'group_re','DP Count Prev', 'Count Success Prev',
       'Count Error Prev', 'Count Timeout Prev',
       'Total Amount DP Prev', 'Avg DP Time Prev', 'Count_01m30s_Below Prev',
       'Count_01m31s_to_02m00s Prev', 'Count_02m01s_to_03m00s Prev',
       'Count_03m00s_Above Prev', 'WD Count Prev', 'Total Amount WD Prev', 'Avg WD Time Prev',
       'Count_03m00s_Below Prev', 'Count_03m31s_to_05m00s Prev',
       'Count_05m00s_to_10m00s Prev', 'Count_10m00s_Above Prev']

In [103]:
df_total_prev

Unnamed: 0,Week_Label,Week_Index,pacing_mark,providerName,channel_type,type,Country,account_group,group_re,DP Count Prev,...,Count_01m31s_to_02m00s Prev,Count_02m01s_to_03m00s Prev,Count_03m00s_Above Prev,WD Count Prev,Total Amount WD Prev,Avg WD Time Prev,Count_03m00s_Below Prev,Count_03m31s_to_05m00s Prev,Count_05m00s_to_10m00s Prev,Count_10m00s_Above Prev
1618,28 Dec 25,-5,Included,aipay,qr-code,DEPOSIT,Thailand,96G1,KZP,1306,...,101,59,48,,,,,,,
1619,28 Dec 25,-5,Included,aipay,qr-code,DEPOSIT,Thailand,KZG1,KZO,6795,...,1274,1019,410,,,,,,,
1620,28 Dec 25,-5,Included,aipay,qr-code,DEPOSIT,Thailand,KZG1,KZP,1222,...,210,168,98,,,,,,,
1621,28 Dec 25,-5,Included,aipay,qr-code,DEPOSIT,Thailand,KZG2,KZO,6413,...,1121,853,327,,,,,,,
1622,28 Dec 25,-5,Included,aipay,qr-code,DEPOSIT,Thailand,Others,KZP,1641,...,303,221,76,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1447,4 Jan 26,-4,Included,xpay-pk,easypaisa,WITHDRAW,Pakistan,KZG1,KZO,,...,,,,325,1071114.0,22511.0,311,5,5,4
1448,4 Jan 26,-4,Included,xqpay-bd,bkash,WITHDRAW,Bangladesh,KZG1,KZO,,...,,,,1185,1164546.0,70677.0,1166,5,8,6
1449,4 Jan 26,-4,Included,xqpay-bd,nagad,WITHDRAW,Bangladesh,KZG1,KZO,,...,,,,952,864338.0,48122.0,944,3,2,3
1450,4 Jan 26,-4,Included,xqpay-bd,rocket,WITHDRAW,Bangladesh,KZG1,KZO,,...,,,,12,6585.0,2600.0,6,2,3,1


In [104]:
index_cols = [
    'Week_Label', 'Week_Index', 'providerName',
    'channel_type', 'type', 'Country', 'account_group', 'group_re'
]

metric_cols = [
    'DP Count',  "Count Success", "Count Error", "Count Timeout", 'Total Amount DP', 'Avg DP Time',
    'Count_01m30s_Below', 'Count_01m31s_to_02m00s',
    'Count_02m01s_to_03m00s', 'Count_03m00s_Above',
    'WD Count', 'Total Amount WD', 'Avg WD Time',
    'Count_03m00s_Below', 'Count_03m31s_to_05m00s',
    'Count_05m00s_to_10m00s', 'Count_10m00s_Above'
]

df_grouped_final = (
    df_total_final
    .groupby(index_cols)[metric_cols]
    .sum()
    .reset_index()
)

In [105]:
df_grouped_final = df_grouped_final.sort_values(by=['Week_Index', 'type'], ascending=[False, True])

In [106]:
index_cols = [
    'Week_Label', 'Week_Index', 'providerName',
    'channel_type', 'type', 'Country', 'account_group', "group_re"
]

metric_cols_prev = [
    'DP Count Prev',  "Count Success Prev", "Count Error Prev", "Count Timeout Prev", 'Total Amount DP Prev', 'Avg DP Time Prev',
    'Count_01m30s_Below Prev', 'Count_01m31s_to_02m00s Prev',
    'Count_02m01s_to_03m00s Prev', 'Count_03m00s_Above Prev',
    'WD Count Prev', 'Total Amount WD Prev', 'Avg WD Time Prev',
    'Count_03m00s_Below Prev', 'Count_03m31s_to_05m00s Prev',
    'Count_05m00s_to_10m00s Prev', 'Count_10m00s_Above Prev'
]

df_grouped_prev = (
    df_total_prev
    .groupby(index_cols)[metric_cols_prev]
    .sum()
    .reset_index()
)

In [107]:
df_grouped_final

Unnamed: 0,Week_Label,Week_Index,providerName,channel_type,type,Country,account_group,group_re,DP Count,Count Success,...,Count_01m31s_to_02m00s,Count_02m01s_to_03m00s,Count_03m00s_Above,WD Count,Total Amount WD,Avg WD Time,Count_03m00s_Below,Count_03m31s_to_05m00s,Count_05m00s_to_10m00s,Count_10m00s_Above
3922,8 Feb 26,0,aipay,qr-code,DEPOSIT,Thailand,KZG1,KZO,284,192,...,54,35,10,0,0.0,0.0,0,0,0,0
3923,8 Feb 26,0,aipay,qr-code,DEPOSIT,Thailand,KZG1,KZP,43,26,...,5,2,2,0,0.0,0.0,0,0,0,0
3924,8 Feb 26,0,aipay,qr-code,DEPOSIT,Thailand,KZG2,KZO,491,387,...,132,63,10,0,0.0,0.0,0,0,0,0
3925,8 Feb 26,0,aipay,qr-code,DEPOSIT,Thailand,Others,KZP,132,100,...,33,17,3,0,0.0,0.0,0,0,0,0
3926,8 Feb 26,0,all2pay-pk,easypaisa,DEPOSIT,Pakistan,KZG1,KZO,570,134,...,23,13,16,0,0.0,0.0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1777,2 Nov 25,-14,wingpay,paymaya,WITHDRAW,Philippines,PH96G1,KZP,0,0,...,0,0,0,34,43134.0,1914.0,33,1,0,0
1778,2 Nov 25,-14,wingpay,paymaya,WITHDRAW,Philippines,PHBLG1,KZP,0,0,...,0,0,0,4,603.0,766.0,2,1,1,0
1783,2 Nov 25,-14,xqpay-bd,bkash,WITHDRAW,Bangladesh,KZG1,KZO,0,0,...,0,0,0,271,436981.0,10801.0,268,3,0,0
1785,2 Nov 25,-14,xqpay-bd,nagad,WITHDRAW,Bangladesh,KZG1,KZO,0,0,...,0,0,0,196,245447.0,8744.0,193,1,1,1


In [108]:
df_grouped_prev

Unnamed: 0,Week_Label,Week_Index,providerName,channel_type,type,Country,account_group,group_re,DP Count Prev,Count Success Prev,...,Count_01m31s_to_02m00s Prev,Count_02m01s_to_03m00s Prev,Count_03m00s_Above Prev,WD Count Prev,Total Amount WD Prev,Avg WD Time Prev,Count_03m00s_Below Prev,Count_03m31s_to_05m00s Prev,Count_05m00s_to_10m00s Prev,Count_10m00s_Above Prev
0,1 Feb 26,0,aipay,bank-transfer,WITHDRAW,Thailand,96G1,KZP,0,0,...,0,0,0,34,544192.0,3896.0,31,1,1,1
1,1 Feb 26,0,aipay,bank-transfer,WITHDRAW,Thailand,KZG1,KZO,0,0,...,0,0,0,840,4103224.0,212921.0,665,71,45,59
2,1 Feb 26,0,aipay,bank-transfer,WITHDRAW,Thailand,KZG1,KZP,0,0,...,0,0,0,121,570803.0,29743.0,96,12,4,9
3,1 Feb 26,0,aipay,bank-transfer,WITHDRAW,Thailand,KZG2,KZO,0,0,...,0,0,0,481,2572528.0,179159.0,371,42,27,41
4,1 Feb 26,0,aipay,bank-transfer,WITHDRAW,Thailand,Others,KZP,0,0,...,0,0,0,118,667336.0,53750.0,94,9,4,11
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4207,9 Nov 25,-12,xqpay-bd,nagad,WITHDRAW,Bangladesh,KZG1,KZO,0,0,...,0,0,0,740,779886.0,38021.0,730,2,2,6
4208,9 Nov 25,-12,xqpay-bd,rocket,DEPOSIT,Bangladesh,KZG1,KZO,312,189,...,40,58,23,0,0.0,0.0,0,0,0,0
4209,9 Nov 25,-12,xqpay-bd,rocket,WITHDRAW,Bangladesh,KZG1,KZO,0,0,...,0,0,0,31,31609.0,2155.0,28,1,2,0
4210,9 Nov 25,-12,zenpay,pix,DEPOSIT,Brazil,KZG1,KZO,82,65,...,4,4,0,0,0.0,0.0,0,0,0,0


In [109]:
df_grouped_concat = df_grouped_final.merge(df_grouped_prev[['Week_Index', 'providerName',
    'channel_type', 'type', 'Country', 'account_group', 'group_re','DP Count Prev',  "Count Success Prev", "Count Error Prev", "Count Timeout Prev",'Total Amount DP Prev', 'Avg DP Time Prev',
    'Count_01m30s_Below Prev', 'Count_01m31s_to_02m00s Prev',
    'Count_02m01s_to_03m00s Prev', 'Count_03m00s_Above Prev',
    'WD Count Prev', 'Total Amount WD Prev', 'Avg WD Time Prev',
    'Count_03m00s_Below Prev', 'Count_03m31s_to_05m00s Prev',
    'Count_05m00s_to_10m00s Prev', 'Count_10m00s_Above Prev']], how = 'outer', on = ['Week_Index', 'providerName',
       'channel_type', 'type', 'Country', 'account_group', 'group_re'])

In [110]:
list_data = df_grouped_concat[df_grouped_concat["Week_Label"].notna()][["Week_Label", "Week_Index"]].drop_duplicates()

In [111]:
df_grouped_concat = df_grouped_concat[[i for i in df_grouped_concat.columns if "Week_Label" not in i]].merge(list_data,  how = "left")

In [112]:
df_grouped_concat["Week_Label"] = df_grouped_concat["Week_Label"].astype(str)

In [113]:
df_grouped_concat.groupby(['Week_Label', 'Week_Index']).agg({"WD Count" : "sum"
                                                             , "WD Count Prev": "sum"}).reset_index().sort_values(by=['Week_Index'], ascending=[False])

Unnamed: 0,Week_Label,Week_Index,WD Count,WD Count Prev
13,8 Feb 26,0,79316,593058
0,1 Feb 26,-1,593058,528664
8,25 Jan 26,-2,528664,525321
4,18 Jan 26,-3,525321,485822
1,11 Jan 26,-4,485822,504188
11,4 Jan 26,-5,504188,495814
9,28 Dec 25,-6,495814,432240
6,21 Dec 25,-7,432240,400276
2,14 Dec 25,-8,400276,433285
12,7 Dec 25,-9,433285,395136


In [114]:
df_grouped_concat = df_grouped_concat[["Week_Label"] + [i for i in df_grouped_concat.columns if i != "Week_Label"]]

In [115]:
df_grouped_concat = df_grouped_concat[df_grouped_concat["Week_Index"] >= -6]

In [116]:
df_grouped_concat["group_re"] = df_grouped_concat["group_re"].str.replace("KZG", "KZO")

In [117]:
import pandas as pd
import gspread
from gspread_dataframe import set_with_dataframe
from google.oauth2.service_account import Credentials

# --- 1. CONFIGURATION ---
CREDENTIALS_FILE = 'gen-lang-client-0209575391-96d90a513b0b.json'
SHEET_ID = '1Nq9u4bg0tvLnUutVh2TcxXOxe-G2E65kxy_pbJ8pce4'
WORKSHEET_NAME = 'WoW Data'

def write_df_to_gsheet(dataframe):
    # --- 2. AUTHENTICATION ---
    # Define the scope (permissions) required
    scopes = [
        'https://www.googleapis.com/auth/spreadsheets',
        'https://www.googleapis.com/auth/drive'
    ]
    
    # Authenticate using the JSON key file
    credentials = Credentials.from_service_account_file(
        CREDENTIALS_FILE, 
        scopes=scopes
    )
    gc = gspread.authorize(credentials)

    # --- 3. CONNECT TO SHEET ---
    print(f"Opening sheet ID: {SHEET_ID}...")
    sh = gc.open_by_key(SHEET_ID)
    
    try:
        worksheet = sh.worksheet(WORKSHEET_NAME)
    except gspread.WorksheetNotFound:
        print(f"Worksheet '{WORKSHEET_NAME}' not found. Creating it...")
        worksheet = sh.add_worksheet(title=WORKSHEET_NAME, rows="100", cols="20")

    # --- 4. WRITE DATA ---
    print("Clearing old data...")
    worksheet.clear()  # Removes old data so rows don't overlap
    
    print(f"Writing {len(dataframe)} rows to Google Sheets...")
    # 'set_with_dataframe' handles headers and NaN values automatically
    set_with_dataframe(worksheet, dataframe)
    
    print("Done!")

# --- Usage ---
if __name__ == "__main__":
    # Assuming 'df' exists from your previous code
    # If not, uncomment the line below to test with dummy data:
    # df = pd.DataFrame({'Date': ['2023-01-01'], 'Amount': [100]})
    
    write_df_to_gsheet(df_grouped_concat.sort_values(by=['Week_Index'], ascending=[False]))

Opening sheet ID: 1Nq9u4bg0tvLnUutVh2TcxXOxe-G2E65kxy_pbJ8pce4...
Clearing old data...
Writing 2483 rows to Google Sheets...
Done!


In [118]:
df_deposit[df_deposit["Country"] == "Bangladesh"]

Unnamed: 0,Date,providerKey,method,channel_type,type,reqCurrency,Country,status,Hour,Count,...,Count_05m00s_to_10m00s,Count_10m00s_Above,providerName,channel_main,brand,account_group,group_re,Count Success,Count Error,Count Timeout
10,2026-01-13,tgpay-bd,tgpay-bd/bkash,bkash,DEPOSIT,BDT,Bangladesh,completed,09:00 - 09:59,10,...,0,1,tgpay-bd,bkash,BDPOP,KZG1,KZO,10,,
11,2026-01-23,worldpay,worldpay/bkash,bkash,DEPOSIT,BDT,Bangladesh,timeout,16:00 - 16:59,8,...,0,0,worldpay,bkash,ADDA7,KZG1,KZO,,,8
13,2025-12-16,dapay-bd,dapay-bd/bkash,bkash,DEPOSIT,BDT,Bangladesh,timeout,00:00 - 00:59,2,...,0,0,dapay-bd,bkash,ADDA7,KZG1,KZO,,,2
19,2026-01-10,worldpay,worldpay/nagad,nagad,DEPOSIT,BDT,Bangladesh,completed,22:00 - 22:59,68,...,0,1,worldpay,nagad,ADDA7,KZG1,KZO,68,,
27,2026-01-05,worldpay,worldpay/bkash,bkash,DEPOSIT,BDT,Bangladesh,completed,11:00 - 11:59,51,...,0,0,worldpay,bkash,ADDA7,KZG1,KZO,51,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1407414,2026-01-31,tgpay-bd,tgpay-bd/bkash,bkash,DEPOSIT,BDT,Bangladesh,completed,15:00 - 15:59,1,...,0,0,tgpay-bd,bkash,BDPOP,KZG1,KZO,1,,
1407416,2026-01-28,tgpay-bd,tgpay-bd/bkash,bkash,DEPOSIT,BDT,Bangladesh,timeout,06:00 - 06:59,3,...,0,0,tgpay-bd,bkash,BDPOP,KZG1,KZO,,,3
1407417,2026-01-31,tgpay-bd,tgpay-bd/bkash,bkash,DEPOSIT,BDT,Bangladesh,timeout,06:00 - 06:59,1,...,0,0,tgpay-bd,bkash,BDPOP,KZG1,KZO,,,1
1407418,2026-02-01,tgpay-bd,tgpay-bd/bkash,bkash,DEPOSIT,BDT,Bangladesh,completed,06:00 - 06:59,2,...,0,0,tgpay-bd,bkash,BDPOP,KZG1,KZO,2,,


In [119]:
df_grouped_concat[(df_grouped_concat["type"] == "DEPOSIT") & 
                  (df_grouped_concat["Country"] == "Bangladesh") &
                  (df_grouped_concat["Week_Label"] == "4 Jan 26")]

Unnamed: 0,Week_Label,Week_Index,providerName,channel_type,type,Country,account_group,group_re,DP Count,Count Success,...,Count_01m31s_to_02m00s Prev,Count_02m01s_to_03m00s Prev,Count_03m00s_Above Prev,WD Count Prev,Total Amount WD Prev,Avg WD Time Prev,Count_03m00s_Below Prev,Count_03m31s_to_05m00s Prev,Count_05m00s_to_10m00s Prev,Count_10m00s_Above Prev
2943,4 Jan 26,-5,bcatpay,bkash,DEPOSIT,Bangladesh,KZG1,KZO,124.0,34.0,...,9,1,6,0,0.0,0.0,0,0,0,0
2944,4 Jan 26,-5,bcatpay,nagad,DEPOSIT,Bangladesh,KZG1,KZO,55.0,28.0,...,6,1,0,0,0.0,0.0,0,0,0,0
2945,4 Jan 26,-5,bcatpay,rocket,DEPOSIT,Bangladesh,KZG1,KZO,72.0,17.0,...,2,1,1,0,0.0,0.0,0,0,0,0
2975,4 Jan 26,-5,dapay-bd,bkash,DEPOSIT,Bangladesh,KZG1,KZO,626.0,350.0,...,81,45,13,0,0.0,0.0,0,0,0,0
2977,4 Jan 26,-5,dapay-bd,nagad,DEPOSIT,Bangladesh,KZG1,KZO,443.0,268.0,...,45,21,13,0,0.0,0.0,0,0,0,0
3001,4 Jan 26,-5,dpp-bd,bkash,DEPOSIT,Bangladesh,KZG1,KZO,,,...,20,5,4,0,0.0,0.0,0,0,0,0
3002,4 Jan 26,-5,dpp-bd,nagad,DEPOSIT,Bangladesh,KZG1,KZO,,,...,6,3,2,0,0.0,0.0,0,0,0,0
3003,4 Jan 26,-5,dpp-bd,rocket,DEPOSIT,Bangladesh,KZG1,KZO,,,...,0,2,0,0,0.0,0.0,0,0,0,0
3058,4 Jan 26,-5,gopay,bkash,DEPOSIT,Bangladesh,KZG1,KZO,346.0,158.0,...,12,4,1,0,0.0,0.0,0,0,0,0
3059,4 Jan 26,-5,gopay,nagad,DEPOSIT,Bangladesh,KZG1,KZO,241.0,120.0,...,4,5,4,0,0.0,0.0,0,0,0,0


In [120]:
df_grouped_concat

Unnamed: 0,Week_Label,Week_Index,providerName,channel_type,type,Country,account_group,group_re,DP Count,Count Success,...,Count_01m31s_to_02m00s Prev,Count_02m01s_to_03m00s Prev,Count_03m00s_Above Prev,WD Count Prev,Total Amount WD Prev,Avg WD Time Prev,Count_03m00s_Below Prev,Count_03m31s_to_05m00s Prev,Count_05m00s_to_10m00s Prev,Count_10m00s_Above Prev
2555,28 Dec 25,-6,aipay,bank-transfer,WITHDRAW,Thailand,96G1,KZP,0,0,...,0,0,0,109,784944.0,29080.0,74,11,8,16
2556,28 Dec 25,-6,aipay,bank-transfer,WITHDRAW,Thailand,KZG1,KZO,0,0,...,0,0,0,47,231339.0,2763.0,47,0,0,0
2557,28 Dec 25,-6,aipay,bank-transfer,WITHDRAW,Thailand,KZG1,KZP,0,0,...,0,0,0,12,46266.0,744.0,12,0,0,0
2558,28 Dec 25,-6,aipay,bank-transfer,WITHDRAW,Thailand,KZG2,KZO,0,0,...,0,0,0,28,236995.0,14536.0,17,0,0,11
2559,28 Dec 25,-6,aipay,bank-transfer,WITHDRAW,Thailand,Others,KZP,0,0,...,0,0,0,10,89320.0,3571.0,7,0,0,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5033,8 Feb 26,0,ydpay-mx,clabe,DEPOSIT,Mexico,MXNKZG1,KZO,,,...,3,1,4,0,0.0,0.0,0,0,0,0
5034,8 Feb 26,0,ydpay-mx,clabe,WITHDRAW,Mexico,KZG1,KZO,,,...,0,0,0,1111,137993.0,162199.0,1065,33,6,7
5035,8 Feb 26,0,ydpay-mx,clabe,WITHDRAW,Mexico,MXNKZG1,KZO,,,...,0,0,0,3,316.0,460.0,3,0,0,0
5036,8 Feb 26,0,zenpay,pix,DEPOSIT,Brazil,KZG1,KZO,,,...,157,121,52,0,0.0,0.0,0,0,0,0
