In [3]:
import pandas as pd
import gspread
from gspread_dataframe import get_as_dataframe # <--- Import this
from google.oauth2.service_account import Credentials
from gspread_dataframe import set_with_dataframe
from google.cloud import bigquery

In [4]:
# --- 1. CONFIGURATION ---
CREDENTIALS_FILE = 'gen-lang-client-0209575391-96d90a513b0b.json'
SHEET_ID = '1hHV98ZAqng6ogy48iKDTP_tMVP8siwzgB8ybLSq40kU'
WORKSHEET_NAME = 'Transformed'

## READ DATA

In [5]:
# 1. Read the Parquet file
df = pd.read_parquet("daily_funding.parquet")
print(f"Successfully read parquet file with {len(df)} rows.")

# 2. String Normalization
df["brand"] = df["brand"].astype(str).str.upper().str.strip()

# 3. Ensure Dates are standard Datetime objects (Fixes filtering issues)
# Even if parquet stores them correctly, this ensures they are datetime64[ns]
# which allows for direct comparison like: df['Date'] >= '2025-11-01'
if 'Date' in df.columns:
    df['Date'] = pd.to_datetime(df['Date'])

Successfully read parquet file with 1124703 rows.


In [6]:
df["Max_InsertedAt"].max()

Timestamp('2026-02-16 08:13:25.842827+0000', tz='UTC')

In [8]:
client = bigquery.Client(project = 'kz-dp-prod')

# ... [Keep your existing SQL query exactly as is] ...
sql_query = """
SELECT
  Brand AS brand,
  Sub_group AS sub_group,
  CASE
    WHEN Whitelabel = 'KZ' THEN 'KZO'
    ELSE 'KZP'
  END AS whitelabel
FROM `kz-dp-prod.MAPPING.brand_whitelabel_country_folderid_mapping_tbl`;
"""

dim_brand_v2 = client.query(sql_query).to_dataframe()



In [9]:
dim_brand_v2["brand"] = dim_brand_v2["brand"].str.strip().str.upper()

In [10]:
# dim_brand_v2 = pd.read_csv("mapping_brand_v2.csv") 
# dim_brand_v2 = dim_brand_v2[["brand","sub_group", "whitelabel"]]

In [11]:
dim_brand_v2.columns = ["brand", "account_group", "group_re"]

In [12]:
# dim_brand_v2.loc[dim_brand_v2["group_re"] == 'KZ', 'group_re'] = 'KZO'
# dim_brand_v2.loc[dim_brand_v2["group_re"] != 'KZO', 'group_re'] = 'KZP'
# dim_brand_v2.loc[(dim_brand_v2["group_re"] != 'KZP') & (dim_brand_v2["group_re"] != 'KZo'), 'group_re'] = 'KZO'

In [13]:
dim_brand_v2["brand"] = dim_brand_v2["brand"].str.upper().str.strip()

In [14]:
df[((df["status"] == "completed") | (df["status"] == "timeout") | (df["status"] == "error")) & (df["Country"] == "BD")]

Unnamed: 0,Date,providerKey,method,channel_type,type,reqCurrency,account_group,group_re,Country,status,...,Count_01m31s_to_02m00s,Count_02m01s_to_03m00s,Count_03m00s_Above,Count_03m00s_Below,Count_03m31s_to_05m00s,Count_05m00s_to_10m00s,Count_10m00s_Above,providerName,channel_main,brand


In [15]:
df = df[[i for i in df.columns if i not in ["group_re", "account_group"]]].merge(dim_brand_v2, on = "brand", how = "left")

In [16]:
df_deposit = df[df['type'] == 'DEPOSIT']

In [17]:
df_deposit

Unnamed: 0,Date,providerKey,method,channel_type,type,reqCurrency,Country,status,Hour,Count,...,Count_03m00s_Above,Count_03m00s_Below,Count_03m31s_to_05m00s,Count_05m00s_to_10m00s,Count_10m00s_Above,providerName,channel_main,brand,account_group,group_re
0,2026-01-01,wingpay-bd,wingpay-bd,bkash,DEPOSIT,BDT,Bangladesh,timeout,12:00 - 12:59,7,...,0,0,0,0,0,wingpay-bd,bkash,ADDA7,KZG1,KZO
2,2025-12-14,swpay,swpay/jazz-cash,jazz-cash,DEPOSIT,PKR,Pakistan,timeout,12:00 - 12:59,38,...,0,0,0,0,0,swpay,jazz,WOWPK,KZG1,KZO
3,2026-01-31,tarspay-mx,tarspay-mx/clabe,clabe,DEPOSIT,MXN,Mexico,completed,03:00 - 03:59,107,...,18,89,17,0,1,tarspay-mx,clabe,MXWOW,KZG1,KZO
4,2025-12-04,tarspay-pk,tarspay-pk/easypaisa,easypaisa,DEPOSIT,PKR,Pakistan,timeout,16:00 - 16:59,13,...,0,0,0,0,0,tarspay-pk,easypaisa,PKPOP,KZG1,KZO
7,2026-02-04,tgpay-bd,tgpay-bd/rocket,rocket,DEPOSIT,BDT,Bangladesh,completed,22:00 - 22:59,5,...,3,2,2,0,1,tgpay-bd,rocket,ADDA7,KZG1,KZO
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1189957,2025-12-16,wepay,wepay/qr-code-native,qr-code-native,DEPOSIT,THB,Thailand,error,02:00 - 02:59,1,...,0,0,0,0,0,wepay,qr,XOX66,WDB1,KZP
1189958,2026-02-07,wepay,wepay/qr-code-native,qr-code-native,DEPOSIT,THB,Thailand,timeout,01:00 - 01:59,1,...,0,0,0,0,0,wepay,qr,JAI88,96G1,KZP
1189959,2026-02-04,wepay,wepay/qr-code-native,qr-code-native,DEPOSIT,THB,Thailand,error,08:00 - 08:59,1,...,0,0,0,0,0,wepay,qr,GO989,THKZG2,KZO
1189960,2026-02-04,wepay,wepay/qr-code-native,qr-code-native,DEPOSIT,THB,Thailand,error,08:00 - 08:59,1,...,0,0,0,0,0,wepay,qr,GO989,Others,KZP


In [18]:
df_withdraw = df[df['type'] == 'WITHDRAW']

## DEPOSIT DATA

In [19]:
df_deposit = df_deposit[df_deposit["Date"] >= '2025-11-01']

In [20]:
df_deposit

Unnamed: 0,Date,providerKey,method,channel_type,type,reqCurrency,Country,status,Hour,Count,...,Count_03m00s_Above,Count_03m00s_Below,Count_03m31s_to_05m00s,Count_05m00s_to_10m00s,Count_10m00s_Above,providerName,channel_main,brand,account_group,group_re
0,2026-01-01,wingpay-bd,wingpay-bd,bkash,DEPOSIT,BDT,Bangladesh,timeout,12:00 - 12:59,7,...,0,0,0,0,0,wingpay-bd,bkash,ADDA7,KZG1,KZO
2,2025-12-14,swpay,swpay/jazz-cash,jazz-cash,DEPOSIT,PKR,Pakistan,timeout,12:00 - 12:59,38,...,0,0,0,0,0,swpay,jazz,WOWPK,KZG1,KZO
3,2026-01-31,tarspay-mx,tarspay-mx/clabe,clabe,DEPOSIT,MXN,Mexico,completed,03:00 - 03:59,107,...,18,89,17,0,1,tarspay-mx,clabe,MXWOW,KZG1,KZO
4,2025-12-04,tarspay-pk,tarspay-pk/easypaisa,easypaisa,DEPOSIT,PKR,Pakistan,timeout,16:00 - 16:59,13,...,0,0,0,0,0,tarspay-pk,easypaisa,PKPOP,KZG1,KZO
7,2026-02-04,tgpay-bd,tgpay-bd/rocket,rocket,DEPOSIT,BDT,Bangladesh,completed,22:00 - 22:59,5,...,3,2,2,0,1,tgpay-bd,rocket,ADDA7,KZG1,KZO
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1189957,2025-12-16,wepay,wepay/qr-code-native,qr-code-native,DEPOSIT,THB,Thailand,error,02:00 - 02:59,1,...,0,0,0,0,0,wepay,qr,XOX66,WDB1,KZP
1189958,2026-02-07,wepay,wepay/qr-code-native,qr-code-native,DEPOSIT,THB,Thailand,timeout,01:00 - 01:59,1,...,0,0,0,0,0,wepay,qr,JAI88,96G1,KZP
1189959,2026-02-04,wepay,wepay/qr-code-native,qr-code-native,DEPOSIT,THB,Thailand,error,08:00 - 08:59,1,...,0,0,0,0,0,wepay,qr,GO989,THKZG2,KZO
1189960,2026-02-04,wepay,wepay/qr-code-native,qr-code-native,DEPOSIT,THB,Thailand,error,08:00 - 08:59,1,...,0,0,0,0,0,wepay,qr,GO989,Others,KZP


In [21]:
# import pandas as pd
# import gspread
# from gspread_dataframe import set_with_dataframe
# from google.oauth2.service_account import Credentials

# # --- 1. CONFIGURATION ---
# CREDENTIALS_FILE = 'gen-lang-client-0209575391-96d90a513b0b.json'
# SHEET_ID = '1Nq9u4bg0tvLnUutVh2TcxXOxe-G2E65kxy_pbJ8pce4'
# WORKSHEET_NAME = 'Deposit Data'

# def write_df_to_gsheet(dataframe):
#     # --- 2. AUTHENTICATION ---
#     # Define the scope (permissions) required
#     scopes = [
#         'https://www.googleapis.com/auth/spreadsheets',
#         'https://www.googleapis.com/auth/drive'
#     ]
    
#     # Authenticate using the JSON key file
#     credentials = Credentials.from_service_account_file(
#         CREDENTIALS_FILE, 
#         scopes=scopes
#     )
#     gc = gspread.authorize(credentials)

#     # --- 3. CONNECT TO SHEET ---
#     print(f"Opening sheet ID: {SHEET_ID}...")
#     sh = gc.open_by_key(SHEET_ID)
    
#     try:
#         worksheet = sh.worksheet(WORKSHEET_NAME)
#     except gspread.WorksheetNotFound:
#         print(f"Worksheet '{WORKSHEET_NAME}' not found. Creating it...")
#         worksheet = sh.add_worksheet(title=WORKSHEET_NAME, rows="100", cols="20")

#     # --- 4. WRITE DATA ---
#     print("Clearing old data...")
#     worksheet.clear()  # Removes old data so rows don't overlap
    
#     print(f"Writing {len(dataframe)} rows to Google Sheets...")
#     # 'set_with_dataframe' handles headers and NaN values automatically
#     set_with_dataframe(worksheet, dataframe)
    
#     print("Done!")

# # --- Usage ---
# if __name__ == "__main__":
#     # Assuming 'df' exists from your previous code
#     # If not, uncomment the line below to test with dummy data:
#     # df = pd.DataFrame({'Date': ['2023-01-01'], 'Amount': [100]})
    
#     write_df_to_gsheet(df_deposit)

In [22]:
df_deposit.loc[df_deposit["status"] == "completed", "Count Success"] = df_deposit["Count"]

df_deposit.loc[df_deposit["status"].str.contains("error", na=False), "Count Error"] = df_deposit["Count"]

df_deposit.loc[df_deposit["status"].str.contains("timeout", na=False), "Count Timeout"] = df_deposit["Count"]

In [23]:
df_deposit.loc[df_deposit["status"].str.contains("error", na=False)].shape[0] + df_deposit.loc[df_deposit["status"].str.contains("timeout", na=False)].shape[0] + df_deposit.loc[df_deposit["status"] == "completed", "Count Success"].shape[0]

872694

In [24]:
df_deposit["Count"].sum()


np.int64(27571650)

In [25]:
# 1. Define your columns
group_cols = ["Date", "providerName", "channel_type", "type", "Country", "account_group", "group_re", "Hour"]

sum_cols = [
    "Count", "Count Success", "Count Error", "Count Timeout",
    "Total_Net_Amount", 
    "winsorized_total_time_seconds", 
    'Count_01m30s_Below', 'Count_01m31s_to_02m00s',
       'Count_02m01s_to_03m00s', 'Count_03m00s_Above'
]

# 2. Perform Group By and Sum
df_grouped_dep = df_deposit.groupby(group_cols)[sum_cols].sum().reset_index()

# 3. (Optional) Inspect the result
print(df_grouped_dep.head())

        Date providerName          channel_type     type   Country  \
0 2025-11-12    toppay-mx         bank-transfer  DEPOSIT    Mexico   
1 2025-11-13        hspay  bank-transfer-native  DEPOSIT  Thailand   
2 2025-11-20        hspay  bank-transfer-native  DEPOSIT  Thailand   
3 2025-11-26        hspay  bank-transfer-native  DEPOSIT  Thailand   
4 2025-11-27        hspay  bank-transfer-native  DEPOSIT  Thailand   

  account_group group_re           Hour  Count  Count Success  Count Error  \
0          KZG1      KZO  00:00 - 00:59      1              1            0   
1          WDB1      KZP  20:00 - 20:59      1              1            0   
2        THKZG2      KZO  07:00 - 07:59      1              1            0   
3        THKZG1      KZO  05:00 - 05:59      1              1            0   
4          BLG1      KZP  22:00 - 22:59      1              1            0   

   Count Timeout  Total_Net_Amount  winsorized_total_time_seconds  \
0              0              10.0       

In [26]:
import pandas as pd
from datetime import timedelta

# --- 1. PREPARE DATA & INDICES ---
# Assumes df_grouped has [Date, Hour, providerName, etc., Count, NetAmount...]

df_grouped_dep['Date'] = pd.to_datetime(df_grouped_dep['Date'])
df_grouped_dep['Week_Index'] = df_grouped_dep['Date'].dt.isocalendar().week.astype(int)

# Helper: Day of Week (0=Mon, 6=Sun)
df_grouped_dep['DayOfWeek'] = df_grouped_dep['Date'].dt.dayofweek

# --- 2. DETERMINE THE CUTOFF (Based on Current Week) ---
print("Identifying Cutoff from Current Week...")

# Identify the Current Week (Highest Index)
current_week_idx = df_grouped_dep['Week_Index'].max()
current_week_data = df_grouped_dep[df_grouped_dep['Week_Index'] == current_week_idx]

if not current_week_data.empty:
    # 1. How far into the week are we? (e.g., Today is Monday = 0)
    cutoff_day_idx = current_week_data['DayOfWeek'].max()
    
    # 2. What is the latest hour on that specific day?
    cutoff_hour = current_week_data.loc[
        current_week_data['DayOfWeek'] == cutoff_day_idx, 'Hour'
    ].max()
    
    print(f" -> Cutoff Point: Day {cutoff_day_idx} (Mon=0) at {cutoff_hour}")
else:
    # Fallback if current week is empty
    cutoff_day_idx = 6 # Sunday
    cutoff_hour = "23:59 - 23:59"
    print(" -> No current data. defaulting to full week.")

# --- 3. CREATE THE 'MARK' COLUMN ---
print("Tagging rows with 'pacing_mark'...")

def get_pacing_mark(row):
    # Rule 1: Always include the current week (it is the reference)
    if row['Week_Index'] == current_week_idx:
        return 'Included'
    
    # Rule 2: For historical weeks, compare Day & Hour
    # A. If the day is EARLIER in the week (e.g. Current is Wed, Row is Mon) -> Keep
    if row['DayOfWeek'] < cutoff_day_idx:
        return 'Included'
    
    # B. If it's the SAME Day, check the Hour
    if row['DayOfWeek'] == cutoff_day_idx:
        if row['Hour'] <= cutoff_hour:
            return 'Included'
        else:
            return 'Excluded' # Same day, but later hour
            
    # C. If the day is LATER in the week (e.g. Current is Mon, Row is Tue) -> Exclude
    if row['DayOfWeek'] > cutoff_day_idx:
        return 'Excluded'

    return 'Excluded'

# Apply the logic
df_grouped_dep['pacing_mark'] = df_grouped_dep.apply(get_pacing_mark, axis=1)

# --- 4. GENERATE LABELS ---
print("Generating Week Labels...")
# Week start (Monday)
df_grouped_dep['Week_Start'] = df_grouped_dep['Date'] - pd.to_timedelta(df_grouped_dep['Date'].dt.dayofweek, unit='D')

# Week end (Sunday)
df_grouped_dep['Week_End'] = df_grouped_dep['Week_Start'] + pd.to_timedelta(6, unit='D')

# Use Week_End for label
df_grouped_dep['Week_Label'] = (
    df_grouped_dep['Week_End'].dt.day.astype(str) + " " +
    df_grouped_dep['Week_End'].dt.strftime('%b %y')
)

# --- 5. AGGREGATE (INCLUDE 'pacing_mark' IN GROUP BY) ---
print("Aggregating...")

final_group_cols = [
    "Week_Label", 
    "Week_Index", 
    "pacing_mark", # <--- CRITICAL: Keep this so you can filter in Looker
    "providerName", "channel_type", "type", "Country", "account_group", "group_re"
]
sum_cols = [
    "Count", "Count Success", "Count Error", "Count Timeout",
    "Total_Net_Amount", 
    "winsorized_total_time_seconds", 
    'Count_01m30s_Below', 'Count_01m31s_to_02m00s',
       'Count_02m01s_to_03m00s', 'Count_03m00s_Above'
]

df_final_dep = df_grouped_dep.groupby(final_group_cols)[sum_cols].sum().reset_index()

# Sort DESC so Week 49 is top
df_final_dep = df_final_dep.sort_values(by=['Week_Index', 'pacing_mark'], ascending=[False, True])

# print(df_final[['Week_Label', 'pacing_mark', 'Count']].head())

# write_df_to_gsheet(df_final)



Identifying Cutoff from Current Week...
 -> Cutoff Point: Day 6 (Mon=0) at 23:00 - 23:59
Tagging rows with 'pacing_mark'...
Generating Week Labels...
Aggregating...


In [27]:
df_final_dep.loc[df_final_dep["Week_Label"].str.contains("26"), "Week_Index"] += 52

In [28]:
df_final_dep

Unnamed: 0,Week_Label,Week_Index,pacing_mark,providerName,channel_type,type,Country,account_group,group_re,Count,Count Success,Count Error,Count Timeout,Total_Net_Amount,winsorized_total_time_seconds,Count_01m30s_Below,Count_01m31s_to_02m00s,Count_02m01s_to_03m00s,Count_03m00s_Above
1306,28 Dec 25,52,Included,aipay,qr-code,DEPOSIT,Thailand,96G1,KZP,1306,979,27,300,1074492.00,77288.0,771,101,59,48
1307,28 Dec 25,52,Included,aipay,qr-code,DEPOSIT,Thailand,KZG1,KZP,1222,850,16,356,434389.00,110474.0,374,210,168,98
1308,28 Dec 25,52,Included,aipay,qr-code,DEPOSIT,Thailand,Others,KZP,1641,1206,44,391,599058.00,129771.0,606,303,221,76
1309,28 Dec 25,52,Included,aipay,qr-code,DEPOSIT,Thailand,THKZG1,KZO,6795,5183,107,1505,2549155.39,600258.0,2480,1274,1019,410
1310,28 Dec 25,52,Included,aipay,qr-code,DEPOSIT,Thailand,THKZG2,KZO,6413,4899,206,1308,1987161.60,523034.0,2598,1121,853,327
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1671,4 Jan 26,53,Included,xqpay-bd,nagad,DEPOSIT,Bangladesh,KZG1,KZO,4881,3472,12,1397,1622905.00,239390.0,2923,311,150,88
1672,4 Jan 26,53,Included,xqpay-bd,rocket,DEPOSIT,Bangladesh,KZG1,KZO,184,90,0,94,47265.00,12501.0,41,29,10,10
1673,4 Jan 26,53,Included,ydpay-mx,clabe,DEPOSIT,Mexico,KZG1,KZO,446,338,3,105,27961.67,45984.0,137,64,58,79
1674,4 Jan 26,53,Included,yzpay-th,bank-transfer,DEPOSIT,Thailand,BLG1,KZP,192,84,0,108,49513.07,38242.0,0,10,7,67


In [29]:
df_final_dep.columns = ['Week_Label', 'Week_Index', 'pacing_mark','providerName', 'channel_type', 'type', 'Country', 'account_group', "group_re",
        'DP Count', "Count Success", "Count Error", "Count Timeout", 'Total Amount DP',
       'Avg DP Time',     'Count_01m30s_Below', 'Count_01m31s_to_02m00s',
       'Count_02m01s_to_03m00s', 'Count_03m00s_Above']

# cond1 = df_final_dep["Week_Index"] == (df_final_dep["Week_Index"].max() - 1)
# cond2 = df_final_dep["pacing_mark"] == "Excluded"
# df_final_dep.loc[~(cond1 & cond2), "pacing_mark"] = None

df_final_dep["Week_Index"] = df_final_dep["Week_Index"] - df_final_dep["Week_Index"].max()

# df_final_dep = df_final_dep[df_final_dep["Week_Index"] != 0 ]
# df_final_dep["Week_Index"] = df_final_dep["Week_Index"] + 1
# df_final_dep = df_final_dep[df_final_dep["Week_Index"] >= -5]

In [30]:
df_final_dep

Unnamed: 0,Week_Label,Week_Index,pacing_mark,providerName,channel_type,type,Country,account_group,group_re,DP Count,Count Success,Count Error,Count Timeout,Total Amount DP,Avg DP Time,Count_01m30s_Below,Count_01m31s_to_02m00s,Count_02m01s_to_03m00s,Count_03m00s_Above
1306,28 Dec 25,-8,Included,aipay,qr-code,DEPOSIT,Thailand,96G1,KZP,1306,979,27,300,1074492.00,77288.0,771,101,59,48
1307,28 Dec 25,-8,Included,aipay,qr-code,DEPOSIT,Thailand,KZG1,KZP,1222,850,16,356,434389.00,110474.0,374,210,168,98
1308,28 Dec 25,-8,Included,aipay,qr-code,DEPOSIT,Thailand,Others,KZP,1641,1206,44,391,599058.00,129771.0,606,303,221,76
1309,28 Dec 25,-8,Included,aipay,qr-code,DEPOSIT,Thailand,THKZG1,KZO,6795,5183,107,1505,2549155.39,600258.0,2480,1274,1019,410
1310,28 Dec 25,-8,Included,aipay,qr-code,DEPOSIT,Thailand,THKZG2,KZO,6413,4899,206,1308,1987161.60,523034.0,2598,1121,853,327
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1671,4 Jan 26,-7,Included,xqpay-bd,nagad,DEPOSIT,Bangladesh,KZG1,KZO,4881,3472,12,1397,1622905.00,239390.0,2923,311,150,88
1672,4 Jan 26,-7,Included,xqpay-bd,rocket,DEPOSIT,Bangladesh,KZG1,KZO,184,90,0,94,47265.00,12501.0,41,29,10,10
1673,4 Jan 26,-7,Included,ydpay-mx,clabe,DEPOSIT,Mexico,KZG1,KZO,446,338,3,105,27961.67,45984.0,137,64,58,79
1674,4 Jan 26,-7,Included,yzpay-th,bank-transfer,DEPOSIT,Thailand,BLG1,KZP,192,84,0,108,49513.07,38242.0,0,10,7,67


## WITHDRAWL DATA

In [31]:
df_final_dep

Unnamed: 0,Week_Label,Week_Index,pacing_mark,providerName,channel_type,type,Country,account_group,group_re,DP Count,Count Success,Count Error,Count Timeout,Total Amount DP,Avg DP Time,Count_01m30s_Below,Count_01m31s_to_02m00s,Count_02m01s_to_03m00s,Count_03m00s_Above
1306,28 Dec 25,-8,Included,aipay,qr-code,DEPOSIT,Thailand,96G1,KZP,1306,979,27,300,1074492.00,77288.0,771,101,59,48
1307,28 Dec 25,-8,Included,aipay,qr-code,DEPOSIT,Thailand,KZG1,KZP,1222,850,16,356,434389.00,110474.0,374,210,168,98
1308,28 Dec 25,-8,Included,aipay,qr-code,DEPOSIT,Thailand,Others,KZP,1641,1206,44,391,599058.00,129771.0,606,303,221,76
1309,28 Dec 25,-8,Included,aipay,qr-code,DEPOSIT,Thailand,THKZG1,KZO,6795,5183,107,1505,2549155.39,600258.0,2480,1274,1019,410
1310,28 Dec 25,-8,Included,aipay,qr-code,DEPOSIT,Thailand,THKZG2,KZO,6413,4899,206,1308,1987161.60,523034.0,2598,1121,853,327
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1671,4 Jan 26,-7,Included,xqpay-bd,nagad,DEPOSIT,Bangladesh,KZG1,KZO,4881,3472,12,1397,1622905.00,239390.0,2923,311,150,88
1672,4 Jan 26,-7,Included,xqpay-bd,rocket,DEPOSIT,Bangladesh,KZG1,KZO,184,90,0,94,47265.00,12501.0,41,29,10,10
1673,4 Jan 26,-7,Included,ydpay-mx,clabe,DEPOSIT,Mexico,KZG1,KZO,446,338,3,105,27961.67,45984.0,137,64,58,79
1674,4 Jan 26,-7,Included,yzpay-th,bank-transfer,DEPOSIT,Thailand,BLG1,KZP,192,84,0,108,49513.07,38242.0,0,10,7,67


In [32]:
# import pandas as pd
# import gspread
# from gspread_dataframe import set_with_dataframe
# from google.oauth2.service_account import Credentials

# # --- 1. CONFIGURATION ---
# CREDENTIALS_FILE = 'gen-lang-client-0209575391-96d90a513b0b.json'
# SHEET_ID = '1Nq9u4bg0tvLnUutVh2TcxXOxe-G2E65kxy_pbJ8pce4'
# WORKSHEET_NAME = 'Withdrawal Data'

# def write_df_to_gsheet(dataframe):
#     # --- 2. AUTHENTICATION ---
#     # Define the scope (permissions) required
#     scopes = [
#         'https://www.googleapis.com/auth/spreadsheets',
#         'https://www.googleapis.com/auth/drive'
#     ]
    
#     # Authenticate using the JSON key file
#     credentials = Credentials.from_service_account_file(
#         CREDENTIALS_FILE, 
#         scopes=scopes
#     )
#     gc = gspread.authorize(credentials)

#     # --- 3. CONNECT TO SHEET ---
#     print(f"Opening sheet ID: {SHEET_ID}...")
#     sh = gc.open_by_key(SHEET_ID)
    
#     try:
#         worksheet = sh.worksheet(WORKSHEET_NAME)
#     except gspread.WorksheetNotFound:
#         print(f"Worksheet '{WORKSHEET_NAME}' not found. Creating it...")
#         worksheet = sh.add_worksheet(title=WORKSHEET_NAME, rows="100", cols="20")

#     # --- 4. WRITE DATA ---
#     print("Clearing old data...")
#     worksheet.clear()  # Removes old data so rows don't overlap
    
#     print(f"Writing {len(dataframe)} rows to Google Sheets...")
#     # 'set_with_dataframe' handles headers and NaN values automatically
#     set_with_dataframe(worksheet, dataframe)
    
#     print("Done!")

# # --- Usage ---
# if __name__ == "__main__":
#     # Assuming 'df' exists from your previous code
#     # If not, uncomment the line below to test with dummy data:
#     # df = pd.DataFrame({'Date': ['2023-01-01'], 'Amount': [100]})
    
#     write_df_to_gsheet(df_withdraw)

In [33]:
df_withdraw[["Date", "providerName", "channel_type", "type" , "Country", "account_group"]]

Unnamed: 0,Date,providerName,channel_type,type,Country,account_group
1,2026-02-06,vcpay-ph-native,gcash,WITHDRAW,Philippines,PHKZG1
5,2026-02-14,sgpay,gcash,WITHDRAW,Philippines,PHKZG1
6,2025-12-12,xqpay-bd,bkash,WITHDRAW,Bangladesh,KZG1
12,2026-02-07,toppay-mx,clabe,WITHDRAW,Mexico,KZG1
13,2025-12-22,xqpay-bd,bkash,WITHDRAW,Bangladesh,KZG1
...,...,...,...,...,...,...
1181052,2025-12-04,wepay,bank-transfer,WITHDRAW,Thailand,96G1
1181053,2025-12-24,wepay,bank-transfer,WITHDRAW,Thailand,96G1
1181054,2026-02-05,wepay,bank-transfer,WITHDRAW,Thailand,THKZG2
1181055,2025-12-14,wepay,bank-transfer,WITHDRAW,Thailand,THKZG2


In [34]:
df_withdraw = df_withdraw[df_withdraw["Date"] >= "2025-11-01"]

In [35]:
df_withdraw[["Count", "Total_Net_Amount", "winsorized_total_time_seconds", "Count_03m00s_Below", "Count_03m31s_to_05m00s", "Count_05m00s_to_10m00s", 'Count_10m00s_Above']]

Unnamed: 0,Count,Total_Net_Amount,winsorized_total_time_seconds,Count_03m00s_Below,Count_03m31s_to_05m00s,Count_05m00s_to_10m00s,Count_10m00s_Above
1,184,53781.0,7305.0,184,0,0,0
5,248,107741.0,13534.0,248,0,0,0
6,24,12881.0,994.0,24,0,0,0
12,53,16957.0,8747.0,40,12,0,1
13,10,28120.0,544.0,9,1,0,0
...,...,...,...,...,...,...,...
1181052,1,2000.0,408.0,0,0,1,0
1181053,1,700.0,697.0,0,0,0,1
1181054,4,5444.0,408.0,4,0,0,0
1181055,3,4480.0,156.0,3,0,0,0


In [36]:
# 1. Define your columns
group_cols = ["Date", "providerName", "channel_type", "type", "Country", "account_group", "group_re", "Hour"]

sum_cols = [
    "Count", 
    "Total_Net_Amount", 
    "winsorized_total_time_seconds", 
    "Count_03m00s_Below", 
    "Count_03m31s_to_05m00s", 
    "Count_05m00s_to_10m00s", 
    "Count_10m00s_Above"
]

# 2. Perform Group By and Sum
df_grouped = df_withdraw.groupby(group_cols)[sum_cols].sum().reset_index()

# 3. (Optional) Inspect the result
print(df_grouped.head())

        Date providerName channel_type      type Country account_group  \
0 2025-11-30    toppay-mx        clabe  WITHDRAW  Mexico          KZG1   
1 2025-11-30    toppay-mx        clabe  WITHDRAW  Mexico          KZG1   
2 2025-11-30    toppay-mx        clabe  WITHDRAW  Mexico          KZG1   
3 2025-11-30    toppay-mx        clabe  WITHDRAW  Mexico          KZG1   
4 2025-11-30    toppay-mx        clabe  WITHDRAW  Mexico          KZG1   

  group_re           Hour  Count  Total_Net_Amount  \
0      KZO  17:00 - 17:59      1             208.0   
1      KZO  18:00 - 18:59      4             352.0   
2      KZO  19:00 - 19:59      9            4331.0   
3      KZO  20:00 - 20:59      3             200.0   
4      KZO  21:00 - 21:59      5             751.0   

   winsorized_total_time_seconds  Count_03m00s_Below  Count_03m31s_to_05m00s  \
0                          136.0                   1                       0   
1                          630.0                   4                  

In [37]:
import pandas as pd
from datetime import timedelta

# --- 1. PREPARE DATA & INDICES ---
# Assumes df_grouped has [Date, Hour, providerName, etc., Count, NetAmount...]

print("Calculating Week Indices...")
df_grouped['Date'] = pd.to_datetime(df_grouped['Date'])
df_grouped['Week_Index'] = df_grouped['Date'].dt.isocalendar().week.astype(int)

# Helper: Day of Week (0=Mon, 6=Sun)
df_grouped['DayOfWeek'] = df_grouped['Date'].dt.dayofweek

# --- 2. DETERMINE THE CUTOFF (Based on Current Week) ---
print("Identifying Cutoff from Current Week...")

# Identify the Current Week (Highest Index)
current_week_idx = df_grouped['Week_Index'].max()
current_week_data = df_grouped[df_grouped['Week_Index'] == current_week_idx]

if not current_week_data.empty:
    # 1. How far into the week are we? (e.g., Today is Monday = 0)
    cutoff_day_idx = current_week_data['DayOfWeek'].max()
    
    # 2. What is the latest hour on that specific day?
    cutoff_hour = current_week_data.loc[
        current_week_data['DayOfWeek'] == cutoff_day_idx, 'Hour'
    ].max()
    
    print(f" -> Cutoff Point: Day {cutoff_day_idx} (Mon=0) at {cutoff_hour}")
else:
    # Fallback if current week is empty
    cutoff_day_idx = 6 # Sunday
    cutoff_hour = "23:59 - 23:59"
    print(" -> No current data. defaulting to full week.")

# --- 3. CREATE THE 'MARK' COLUMN ---
print("Tagging rows with 'pacing_mark'...")

def get_pacing_mark(row):
    # Rule 1: Always include the current week (it is the reference)
    if row['Week_Index'] == current_week_idx:
        return 'Included'
    
    # Rule 2: For historical weeks, compare Day & Hour
    # A. If the day is EARLIER in the week (e.g. Current is Wed, Row is Mon) -> Keep
    if row['DayOfWeek'] < cutoff_day_idx:
        return 'Included'
    
    # B. If it's the SAME Day, check the Hour
    if row['DayOfWeek'] == cutoff_day_idx:
        if row['Hour'] <= cutoff_hour:
            return 'Included'
        else:
            return 'Excluded' # Same day, but later hour
            
    # C. If the day is LATER in the week (e.g. Current is Mon, Row is Tue) -> Exclude
    if row['DayOfWeek'] > cutoff_day_idx:
        return 'Excluded'

    return 'Excluded'

# Apply the logic
df_grouped['pacing_mark'] = df_grouped.apply(get_pacing_mark, axis=1)

# --- 4. GENERATE LABELS ---
print("Generating Week Labels...")
# Week start (Monday)
df_grouped['Week_Start'] = df_grouped['Date'] - pd.to_timedelta(df_grouped['Date'].dt.dayofweek, unit='D')

# Week end (Sunday)
df_grouped['Week_End'] = df_grouped['Week_Start'] + pd.to_timedelta(6, unit='D')

# Use Week_End for label
df_grouped['Week_Label'] = (
    df_grouped['Week_End'].dt.day.astype(str) + " " +
    df_grouped['Week_End'].dt.strftime('%b %y')
)


# --- 5. AGGREGATE (INCLUDE 'pacing_mark' IN GROUP BY) ---
print("Aggregating...")

final_group_cols = [
    "Week_Label", 
    "Week_Index", 
    "pacing_mark", # <--- CRITICAL: Keep this so you can filter in Looker
    "providerName", "channel_type", "type", "Country", "account_group", "group_re"
]

sum_cols = [
    "Count", "Total_Net_Amount", "winsorized_total_time_seconds", 
    "Count_03m00s_Below", "Count_03m31s_to_05m00s", 
    "Count_05m00s_to_10m00s", "Count_10m00s_Above"
]

df_final = df_grouped.groupby(final_group_cols)[sum_cols].sum().reset_index()

# Sort DESC so Week 49 is top
df_final = df_final.sort_values(by=['Week_Index', 'pacing_mark'], ascending=[False, True])

print(df_final[['Week_Label', 'pacing_mark', 'Count']].head())

# write_df_to_gsheet(df_final)

Calculating Week Indices...
Identifying Cutoff from Current Week...
 -> Cutoff Point: Day 6 (Mon=0) at 23:00 - 23:59
Tagging rows with 'pacing_mark'...
Generating Week Labels...
Aggregating...
    Week_Label pacing_mark  Count
896  28 Dec 25    Included    418
897  28 Dec 25    Included     64
898  28 Dec 25    Included     31
899  28 Dec 25    Included    347
900  28 Dec 25    Included    130


In [38]:
df_final.loc[df_final["Week_Label"].str.contains("26"), "Week_Index"] += 52

In [39]:
df_final.columns = ['Week_Label', 'Week_Index', 'pacing_mark','providerName', 'channel_type', 'type',
       'Country', 'account_group', 'group_re','WD Count', 'Total Amount WD',
       'Avg WD Time', 'Count_03m00s_Below',
       'Count_03m31s_to_05m00s', 'Count_05m00s_to_10m00s',
       'Count_10m00s_Above']
       
# cond1 = df_final["Week_Index"] == (df_final["Week_Index"].max() - 1)
# cond2 = df_final["pacing_mark"] == "Excluded"
# df_final.loc[~(cond1 & cond2), "pacing_mark"] = None

df_final["Week_Index"] = df_final["Week_Index"] - df_final["Week_Index"].max()

# df_final = df_final[df_final["Week_Index"] != 0 ]
# df_final["Week_Index"] = df_final["Week_Index"] + 1
# df_final = df_final[df_final["Week_Index"] >= -5]

In [40]:
df_final

Unnamed: 0,Week_Label,Week_Index,pacing_mark,providerName,channel_type,type,Country,account_group,group_re,WD Count,Total Amount WD,Avg WD Time,Count_03m00s_Below,Count_03m31s_to_05m00s,Count_05m00s_to_10m00s,Count_10m00s_Above
896,28 Dec 25,-8,Included,aipay,bank-transfer,WITHDRAW,Thailand,96G1,KZP,418,990764.0,68219.0,334,3,47,34
897,28 Dec 25,-8,Included,aipay,bank-transfer,WITHDRAW,Thailand,KZG1,KZP,64,567532.0,35485.0,40,0,5,19
898,28 Dec 25,-8,Included,aipay,bank-transfer,WITHDRAW,Thailand,Others,KZP,31,114278.0,11765.0,18,0,5,8
899,28 Dec 25,-8,Included,aipay,bank-transfer,WITHDRAW,Thailand,THKZG1,KZO,347,2512641.0,165229.0,216,14,32,85
900,28 Dec 25,-8,Included,aipay,bank-transfer,WITHDRAW,Thailand,THKZG2,KZO,130,890719.0,53442.0,72,1,15,42
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1103,4 Jan 26,-7,Included,xpay-pk,easypaisa,WITHDRAW,Pakistan,KZG1,KZO,325,1071114.0,22511.0,311,5,5,4
1104,4 Jan 26,-7,Included,xqpay-bd,bkash,WITHDRAW,Bangladesh,KZG1,KZO,1185,1164546.0,70677.0,1166,5,8,6
1105,4 Jan 26,-7,Included,xqpay-bd,nagad,WITHDRAW,Bangladesh,KZG1,KZO,952,864338.0,48122.0,944,3,2,3
1106,4 Jan 26,-7,Included,xqpay-bd,rocket,WITHDRAW,Bangladesh,KZG1,KZO,12,6585.0,2600.0,6,2,3,1


In [41]:
df_total_final = pd.concat([df_final_dep, df_final], axis=0)

In [42]:
df_total_prev = df_total_final[df_total_final["Week_Index"] < 0]
df_total_prev["Week_Index"] = df_total_prev["Week_Index"] + 1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_total_prev["Week_Index"] = df_total_prev["Week_Index"] + 1


In [43]:
df_total_prev.columns = ['Week_Label', 'Week_Index', 'pacing_mark', 'providerName',
       'channel_type', 'type', 'Country', 'account_group', 'group_re','DP Count Prev', 'Count Success Prev',
       'Count Error Prev', 'Count Timeout Prev',
       'Total Amount DP Prev', 'Avg DP Time Prev', 'Count_01m30s_Below Prev',
       'Count_01m31s_to_02m00s Prev', 'Count_02m01s_to_03m00s Prev',
       'Count_03m00s_Above Prev', 'WD Count Prev', 'Total Amount WD Prev', 'Avg WD Time Prev',
       'Count_03m00s_Below Prev', 'Count_03m31s_to_05m00s Prev',
       'Count_05m00s_to_10m00s Prev', 'Count_10m00s_Above Prev']

In [44]:
df_total_prev

Unnamed: 0,Week_Label,Week_Index,pacing_mark,providerName,channel_type,type,Country,account_group,group_re,DP Count Prev,...,Count_01m31s_to_02m00s Prev,Count_02m01s_to_03m00s Prev,Count_03m00s_Above Prev,WD Count Prev,Total Amount WD Prev,Avg WD Time Prev,Count_03m00s_Below Prev,Count_03m31s_to_05m00s Prev,Count_05m00s_to_10m00s Prev,Count_10m00s_Above Prev
1306,28 Dec 25,-7,Included,aipay,qr-code,DEPOSIT,Thailand,96G1,KZP,1306,...,101,59,48,,,,,,,
1307,28 Dec 25,-7,Included,aipay,qr-code,DEPOSIT,Thailand,KZG1,KZP,1222,...,210,168,98,,,,,,,
1308,28 Dec 25,-7,Included,aipay,qr-code,DEPOSIT,Thailand,Others,KZP,1641,...,303,221,76,,,,,,,
1309,28 Dec 25,-7,Included,aipay,qr-code,DEPOSIT,Thailand,THKZG1,KZO,6795,...,1274,1019,410,,,,,,,
1310,28 Dec 25,-7,Included,aipay,qr-code,DEPOSIT,Thailand,THKZG2,KZO,6413,...,1121,853,327,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1103,4 Jan 26,-6,Included,xpay-pk,easypaisa,WITHDRAW,Pakistan,KZG1,KZO,,...,,,,325,1071114.0,22511.0,311,5,5,4
1104,4 Jan 26,-6,Included,xqpay-bd,bkash,WITHDRAW,Bangladesh,KZG1,KZO,,...,,,,1185,1164546.0,70677.0,1166,5,8,6
1105,4 Jan 26,-6,Included,xqpay-bd,nagad,WITHDRAW,Bangladesh,KZG1,KZO,,...,,,,952,864338.0,48122.0,944,3,2,3
1106,4 Jan 26,-6,Included,xqpay-bd,rocket,WITHDRAW,Bangladesh,KZG1,KZO,,...,,,,12,6585.0,2600.0,6,2,3,1


In [45]:
index_cols = [
    'Week_Label', 'Week_Index', 'providerName',
    'channel_type', 'type', 'Country', 'account_group', 'group_re'
]

metric_cols = [
    'DP Count',  "Count Success", "Count Error", "Count Timeout", 'Total Amount DP', 'Avg DP Time',
    'Count_01m30s_Below', 'Count_01m31s_to_02m00s',
    'Count_02m01s_to_03m00s', 'Count_03m00s_Above',
    'WD Count', 'Total Amount WD', 'Avg WD Time',
    'Count_03m00s_Below', 'Count_03m31s_to_05m00s',
    'Count_05m00s_to_10m00s', 'Count_10m00s_Above'
]

df_grouped_final = (
    df_total_final
    .groupby(index_cols)[metric_cols]
    .sum()
    .reset_index()
)

In [46]:
df_grouped_final = df_grouped_final.sort_values(by=['Week_Index', 'type'], ascending=[False, True])

In [47]:
index_cols = [
    'Week_Label', 'Week_Index', 'providerName',
    'channel_type', 'type', 'Country', 'account_group', "group_re"
]

metric_cols_prev = [
    'DP Count Prev',  "Count Success Prev", "Count Error Prev", "Count Timeout Prev", 'Total Amount DP Prev', 'Avg DP Time Prev',
    'Count_01m30s_Below Prev', 'Count_01m31s_to_02m00s Prev',
    'Count_02m01s_to_03m00s Prev', 'Count_03m00s_Above Prev',
    'WD Count Prev', 'Total Amount WD Prev', 'Avg WD Time Prev',
    'Count_03m00s_Below Prev', 'Count_03m31s_to_05m00s Prev',
    'Count_05m00s_to_10m00s Prev', 'Count_10m00s_Above Prev'
]

df_grouped_prev = (
    df_total_prev
    .groupby(index_cols)[metric_cols_prev]
    .sum()
    .reset_index()
)

In [48]:
df_grouped_final

Unnamed: 0,Week_Label,Week_Index,providerName,channel_type,type,Country,account_group,group_re,DP Count,Count Success,...,Count_01m31s_to_02m00s,Count_02m01s_to_03m00s,Count_03m00s_Above,WD Count,Total Amount WD,Avg WD Time,Count_03m00s_Below,Count_03m31s_to_05m00s,Count_05m00s_to_10m00s,Count_10m00s_Above
1716,22 Feb 26,0,aipay,qr-code,DEPOSIT,Thailand,KZG1,KZP,438,346,...,103,67,34,0,0.0,0.0,0,0,0,0
1717,22 Feb 26,0,aipay,qr-code,DEPOSIT,Thailand,Others,KZP,656,494,...,143,86,52,0,0.0,0.0,0,0,0,0
1718,22 Feb 26,0,aipay,qr-code,DEPOSIT,Thailand,THKZG1,KZO,2403,2052,...,565,352,173,0,0.0,0.0,0,0,0,0
1719,22 Feb 26,0,aipay,qr-code,DEPOSIT,Thailand,THKZG2,KZO,2278,1852,...,477,309,207,0,0.0,0.0,0,0,0,0
1720,22 Feb 26,0,akepay-mx,clabe,DEPOSIT,Mexico,KZG1,KZO,1,0,...,0,0,0,0,0.0,0.0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2502,30 Nov 25,-12,toppay-mx,clabe,WITHDRAW,Mexico,KZG1,KZO,0,0,...,0,0,0,35,9415.0,5684.0,29,6,0,0
2505,30 Nov 25,-12,zenpay,pix,WITHDRAW,Brazil,KZG1,KZO,0,0,...,0,0,0,7,307.0,448.0,7,0,0,0
1902,23 Nov 25,-13,hspay,bank-transfer-native,DEPOSIT,Thailand,THKZG2,KZO,1,1,...,0,0,1,0,0.0,0.0,0,0,0,0
1165,16 Nov 25,-14,hspay,bank-transfer-native,DEPOSIT,Thailand,WDB1,KZP,1,1,...,0,0,1,0,0.0,0.0,0,0,0,0


In [49]:
df_grouped_prev

Unnamed: 0,Week_Label,Week_Index,providerName,channel_type,type,Country,account_group,group_re,DP Count Prev,Count Success Prev,...,Count_01m31s_to_02m00s Prev,Count_02m01s_to_03m00s Prev,Count_03m00s_Above Prev,WD Count Prev,Total Amount WD Prev,Avg WD Time Prev,Count_03m00s_Below Prev,Count_03m31s_to_05m00s Prev,Count_05m00s_to_10m00s Prev,Count_10m00s_Above Prev
0,1 Feb 26,-2,aipay,bank-transfer,WITHDRAW,Thailand,96G1,KZP,0,0,...,0,0,0,34,544192.0,3896.0,31,1,1,1
1,1 Feb 26,-2,aipay,bank-transfer,WITHDRAW,Thailand,KZG1,KZP,0,0,...,0,0,0,121,570803.0,29743.0,96,12,4,9
2,1 Feb 26,-2,aipay,bank-transfer,WITHDRAW,Thailand,Others,KZP,0,0,...,0,0,0,118,667336.0,53750.0,94,9,4,11
3,1 Feb 26,-2,aipay,bank-transfer,WITHDRAW,Thailand,THKZG1,KZO,0,0,...,0,0,0,840,4103224.0,212921.0,665,71,45,59
4,1 Feb 26,-2,aipay,bank-transfer,WITHDRAW,Thailand,THKZG2,KZO,0,0,...,0,0,0,481,2572528.0,179159.0,371,42,27,41
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3180,8 Feb 26,-1,ydpay-mx,clabe,DEPOSIT,Mexico,MXNKZG1,KZO,812,635,...,118,107,91,0,0.0,0.0,0,0,0,0
3181,8 Feb 26,-1,ydpay-mx,clabe,WITHDRAW,Mexico,KZG1,KZO,0,0,...,0,0,0,2010,273935.0,311712.0,1859,133,3,15
3182,8 Feb 26,-1,ydpay-mx,clabe,WITHDRAW,Mexico,MXNKZG1,KZO,0,0,...,0,0,0,334,25175.0,50461.0,312,18,2,2
3183,8 Feb 26,-1,zenpay,pix,DEPOSIT,Brazil,KZG1,KZO,1844,1490,...,72,54,27,0,0.0,0.0,0,0,0,0


In [50]:
df_grouped_concat = df_grouped_final.merge(df_grouped_prev[['Week_Index', 'providerName',
    'channel_type', 'type', 'Country', 'account_group', 'group_re','DP Count Prev',  "Count Success Prev", "Count Error Prev", "Count Timeout Prev",'Total Amount DP Prev', 'Avg DP Time Prev',
    'Count_01m30s_Below Prev', 'Count_01m31s_to_02m00s Prev',
    'Count_02m01s_to_03m00s Prev', 'Count_03m00s_Above Prev',
    'WD Count Prev', 'Total Amount WD Prev', 'Avg WD Time Prev',
    'Count_03m00s_Below Prev', 'Count_03m31s_to_05m00s Prev',
    'Count_05m00s_to_10m00s Prev', 'Count_10m00s_Above Prev']], how = 'outer', on = ['Week_Index', 'providerName',
       'channel_type', 'type', 'Country', 'account_group', 'group_re'])

In [51]:
list_data = df_grouped_concat[df_grouped_concat["Week_Label"].notna()][["Week_Label", "Week_Index"]].drop_duplicates()

In [52]:
df_grouped_concat = df_grouped_concat[[i for i in df_grouped_concat.columns if "Week_Label" not in i]].merge(list_data,  how = "left")

In [53]:
df_grouped_concat["Week_Label"] = df_grouped_concat["Week_Label"].astype(str)

In [54]:
df_grouped_concat.groupby(['Week_Label', 'Week_Index']).agg({"WD Count" : "sum"
                                                             , "WD Count Prev": "sum"}).reset_index().sort_values(by=['Week_Index'], ascending=[False])

Unnamed: 0,Week_Label,Week_Index,WD Count,WD Count Prev
7,22 Feb 26,0,46000,633763
3,15 Feb 26,-1,633763,614916
14,8 Feb 26,-2,614916,593058
0,1 Feb 26,-3,593058,528664
9,25 Jan 26,-4,528664,525321
5,18 Jan 26,-5,525321,485822
1,11 Jan 26,-6,485822,504188
12,4 Jan 26,-7,504188,495814
10,28 Dec 25,-8,495814,432240
6,21 Dec 25,-9,432240,400276


In [55]:
df_grouped_concat = df_grouped_concat[["Week_Label"] + [i for i in df_grouped_concat.columns if i != "Week_Label"]]

In [56]:
df_grouped_concat = df_grouped_concat[df_grouped_concat["Week_Index"] >= -6]

In [57]:
df_grouped_concat["group_re"] = df_grouped_concat["group_re"].str.replace("KZG", "KZO")

In [63]:
import pandas as pd
import gspread
from gspread_dataframe import set_with_dataframe
from google.oauth2.service_account import Credentials

# --- 1. CONFIGURATION ---
CREDENTIALS_FILE = 'plexiform-armor-481509-h4-99dffa062794.json'
SHEET_ID = '1Nq9u4bg0tvLnUutVh2TcxXOxe-G2E65kxy_pbJ8pce4'
WORKSHEET_NAME = 'WoW Data'

def write_df_to_gsheet(dataframe):
    # --- 2. AUTHENTICATION ---
    # Define the scope (permissions) required
    scopes = [
        'https://www.googleapis.com/auth/spreadsheets',
        'https://www.googleapis.com/auth/drive'
    ]
    
    # Authenticate using the JSON key file
    credentials = Credentials.from_service_account_file(
        CREDENTIALS_FILE, 
        scopes=scopes
    )
    gc = gspread.authorize(credentials)

    # --- 3. CONNECT TO SHEET ---
    print(f"Opening sheet ID: {SHEET_ID}...")
    sh = gc.open_by_key(SHEET_ID)
    
    try:
        worksheet = sh.worksheet(WORKSHEET_NAME)
    except gspread.WorksheetNotFound:
        print(f"Worksheet '{WORKSHEET_NAME}' not found. Creating it...")
        worksheet = sh.add_worksheet(title=WORKSHEET_NAME, rows="100", cols="20")

    # --- 4. WRITE DATA ---
    print("Clearing old data...")
    worksheet.clear()  # Removes old data so rows don't overlap
    
    print(f"Writing {len(dataframe)} rows to Google Sheets...")
    # 'set_with_dataframe' handles headers and NaN values automatically
    set_with_dataframe(worksheet, dataframe)
    
    print("Done!")

# --- Usage ---
if __name__ == "__main__":
    # Assuming 'df' exists from your previous code
    # If not, uncomment the line below to test with dummy data:
    # df = pd.DataFrame({'Date': ['2023-01-01'], 'Amount': [100]})
    
    write_df_to_gsheet(df_grouped_concat.sort_values(by=['Week_Index'], ascending=[False]))

Opening sheet ID: 1Nq9u4bg0tvLnUutVh2TcxXOxe-G2E65kxy_pbJ8pce4...
Clearing old data...
Writing 2328 rows to Google Sheets...
Done!


In [64]:
df_deposit[df_deposit["Country"] == "Bangladesh"]

Unnamed: 0,Date,providerKey,method,channel_type,type,reqCurrency,Country,status,Hour,Count,...,Count_05m00s_to_10m00s,Count_10m00s_Above,providerName,channel_main,brand,account_group,group_re,Count Success,Count Error,Count Timeout
0,2026-01-01,wingpay-bd,wingpay-bd,bkash,DEPOSIT,BDT,Bangladesh,timeout,12:00 - 12:59,7,...,0,0,wingpay-bd,bkash,ADDA7,KZG1,KZO,,,7
7,2026-02-04,tgpay-bd,tgpay-bd/rocket,rocket,DEPOSIT,BDT,Bangladesh,completed,22:00 - 22:59,5,...,0,1,tgpay-bd,rocket,ADDA7,KZG1,KZO,5,,
9,2025-12-27,tgpay-bd,tgpay-bd/bkash,bkash,DEPOSIT,BDT,Bangladesh,completed,13:00 - 13:59,44,...,0,1,tgpay-bd,bkash,ADDA7,KZG1,KZO,44,,
11,2026-02-12,xpay-bd,xpay-bd/bkash,bkash,DEPOSIT,BDT,Bangladesh,timeout,13:00 - 13:59,86,...,0,0,xpay-bd,bkash,ADDA7,KZG1,KZO,,,86
15,2025-12-08,wingpay-bd,wingpay-bd/bkash-qr,bkash-qr,DEPOSIT,BDT,Bangladesh,completed,03:00 - 03:59,28,...,0,0,wingpay-bd,bkash,ADDA7,KZG1,KZO,28,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
979125,2026-02-08,xqpay-bd,xqpay-bd/bkash,bkash,DEPOSIT,BDT,Bangladesh,timeout,18:00 - 18:59,1,...,0,0,xqpay-bd,bkash,ADDA7,KZG1,KZO,,,1
979135,2026-01-10,worldpay,worldpay/nagad,nagad,DEPOSIT,BDT,Bangladesh,error,01:00 - 01:59,1,...,0,0,worldpay,nagad,ADDA7,KZG1,KZO,,1,
979139,2026-01-28,gopay,gopay/nagad,nagad,DEPOSIT,BDT,Bangladesh,timeout,03:00 - 03:59,1,...,0,0,gopay,nagad,BDPOP,KZG1,KZO,,,1
979146,2025-12-22,xqpay-bd,xqpay-bd/bkash,bkash,DEPOSIT,BDT,Bangladesh,timeout,06:00 - 06:59,1,...,0,0,xqpay-bd,bkash,BDPOP,KZG1,KZO,,,1


In [65]:
df_grouped_concat[(df_grouped_concat["type"] == "DEPOSIT") & 
                  (df_grouped_concat["Country"] == "Bangladesh") &
                  (df_grouped_concat["Week_Label"] == "4 Jan 26")]

Unnamed: 0,Week_Label,Week_Index,providerName,channel_type,type,Country,account_group,group_re,DP Count,Count Success,...,Count_01m31s_to_02m00s Prev,Count_02m01s_to_03m00s Prev,Count_03m00s_Above Prev,WD Count Prev,Total Amount WD Prev,Avg WD Time Prev,Count_03m00s_Below Prev,Count_03m31s_to_05m00s Prev,Count_05m00s_to_10m00s Prev,Count_10m00s_Above Prev


In [66]:
df_grouped_concat

Unnamed: 0,Week_Label,Week_Index,providerName,channel_type,type,Country,account_group,group_re,DP Count,Count Success,...,Count_01m31s_to_02m00s Prev,Count_02m01s_to_03m00s Prev,Count_03m00s_Above Prev,WD Count Prev,Total Amount WD Prev,Avg WD Time Prev,Count_03m00s_Below Prev,Count_03m31s_to_05m00s Prev,Count_05m00s_to_10m00s Prev,Count_10m00s_Above Prev
1499,11 Jan 26,-6,aipay,bank-transfer,WITHDRAW,Thailand,96G1,KZP,0,0,...,0,0,0,171,1532048.0,35661.0,150,1,5,15
1500,11 Jan 26,-6,aipay,bank-transfer,WITHDRAW,Thailand,KZG1,KZP,0,0,...,0,0,0,129,768158.0,48198.0,86,7,6,30
1501,11 Jan 26,-6,aipay,bank-transfer,WITHDRAW,Thailand,Others,KZP,0,0,...,0,0,0,362,2388992.0,106297.0,298,11,14,39
1502,11 Jan 26,-6,aipay,bank-transfer,WITHDRAW,Thailand,THKZG1,KZO,0,0,...,0,0,0,870,6742950.0,318001.0,577,57,56,180
1503,11 Jan 26,-6,aipay,bank-transfer,WITHDRAW,Thailand,THKZG2,KZO,0,0,...,0,0,0,1318,8486674.0,453562.0,1059,45,55,159
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3822,22 Feb 26,0,ydpay-mx,clabe,DEPOSIT,Mexico,MXNKZG1,KZO,,,...,3,1,2,0,0.0,0.0,0,0,0,0
3823,22 Feb 26,0,ydpay-mx,clabe,WITHDRAW,Mexico,KZG1,KZO,,,...,0,0,0,76,19936.0,8906.0,75,0,1,0
3824,22 Feb 26,0,ydpay-mx,clabe,WITHDRAW,Mexico,MXNKZG1,KZO,,,...,0,0,0,23,4530.0,2784.0,23,0,0,0
3825,22 Feb 26,0,zenpay,pix,DEPOSIT,Brazil,KZG1,KZO,,,...,64,33,29,0,0.0,0.0,0,0,0,0
