In [13]:
import pandas as pd
import gspread
from gspread_dataframe import get_as_dataframe # <--- Import this
from google.oauth2.service_account import Credentials
from gspread_dataframe import set_with_dataframe
from google.cloud import bigquery

In [14]:
# --- 1. CONFIGURATION ---
CREDENTIALS_FILE = 'gen-lang-client-0209575391-96d90a513b0b.json'
SHEET_ID = '1hHV98ZAqng6ogy48iKDTP_tMVP8siwzgB8ybLSq40kU'
WORKSHEET_NAME = 'Transformed'

## READ DATA

In [15]:
# 1. Read the Parquet file
df = pd.read_parquet("daily_funding.parquet")
print(f"Successfully read parquet file with {len(df)} rows.")

# 2. String Normalization
df["brand"] = df["brand"].astype(str).str.upper().str.strip()

# 3. Ensure Dates are standard Datetime objects (Fixes filtering issues)
# Even if parquet stores them correctly, this ensures they are datetime64[ns]
# which allows for direct comparison like: df['Date'] >= '2025-11-01'
if 'Date' in df.columns:
    df['Date'] = pd.to_datetime(df['Date'])

Successfully read parquet file with 1208316 rows.


In [16]:
client = bigquery.Client()

# ... [Keep your existing SQL query exactly as is] ...
sql_query = """
SELECT
  Brand AS brand,
  Sub_group AS sub_group,
  CASE
    WHEN Whitelabel = 'KZ' THEN 'KZO'
    ELSE 'KZP'
  END AS whitelabel
FROM `kz-dp-prod.MAPPING.brand_whitelabel_country_folderid_mapping_tbl`;
"""

dim_brand_v2 = client.query(sql_query).to_dataframe()



In [17]:
dim_brand_v2["brand"] = dim_brand_v2["brand"].str.strip().str.upper()

In [18]:
# dim_brand_v2 = pd.read_csv("mapping_brand_v2.csv") 
# dim_brand_v2 = dim_brand_v2[["brand","sub_group", "whitelabel"]]

In [19]:
dim_brand_v2.columns = ["brand", "account_group", "group_re"]

In [20]:
# dim_brand_v2.loc[dim_brand_v2["group_re"] == 'KZ', 'group_re'] = 'KZO'
# dim_brand_v2.loc[dim_brand_v2["group_re"] != 'KZO', 'group_re'] = 'KZP'
# dim_brand_v2.loc[(dim_brand_v2["group_re"] != 'KZP') & (dim_brand_v2["group_re"] != 'KZo'), 'group_re'] = 'KZO'

In [21]:
dim_brand_v2["brand"] = dim_brand_v2["brand"].str.upper().str.strip()

In [22]:
df = df[[i for i in df.columns if i not in ["group_re", "account_group"]]].merge(dim_brand_v2, on = "brand", how = "left")

In [23]:
df_deposit = df[df['type'] == 'DEPOSIT']

In [24]:
df_deposit

Unnamed: 0,Date,providerKey,method,channel_type,type,reqCurrency,Country,status,Hour,Count,...,Count_03m00s_Above,Count_03m00s_Below,Count_03m31s_to_05m00s,Count_05m00s_to_10m00s,Count_10m00s_Above,providerName,channel_main,brand,account_group,group_re
0,2026-01-05,toppay-mx,toppay-mx/bank-transfer,bank-transfer,DEPOSIT,MXN,MX,completed,04:00 - 04:59,42,...,4,38,3,1,0,toppay-mx,bank,MXWOW,KZG1,KZO
1,2025-11-25,wingpay-bd,wingpay-bd/bkash-qr,bkash-qr,DEPOSIT,BDT,BD,completed,14:00 - 14:59,7,...,0,7,0,0,0,wingpay-bd,bkash,ADDA7,KZG1,KZO
2,2025-12-11,mmpay,mmpay/gcash-direct,gcash-direct,DEPOSIT,PHP,PH,completed,09:00 - 09:59,121,...,13,108,9,4,0,mmpay,gcash,WINMAYA,96G1,KZP
3,2026-01-13,wingpay-bd,wingpay-bd/bkash-qr,bkash-qr,DEPOSIT,BDT,BD,completed,23:00 - 23:59,7,...,1,6,1,0,0,wingpay-bd,bkash,BDPOP,KZG1,KZO
4,2025-12-30,goalpay,goalpay/paymaya,paymaya,DEPOSIT,PHP,PH,timeout,04:00 - 04:59,3,...,0,0,0,0,0,goalpay,paymaya,EZWIN,KZ,KZO
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1284246,2026-01-26,etpay,etpay/qr-code,qr-code,DEPOSIT,THB,TH,timeout,13:00 - 13:59,1,...,0,0,0,0,0,etpay,qr,MC36,96G1,KZP
1284247,2026-01-26,dppay,dppay/qr-code,qr-code,DEPOSIT,THB,TH,timeout,13:00 - 13:59,1,...,0,0,0,0,0,dppay,qr,SOS69,BLG1,KZP
1284248,2026-01-26,hspay,hspay/bank-transfer-native,bank-transfer-native,DEPOSIT,THB,TH,completed,14:00 - 14:59,1,...,0,1,0,0,0,hspay,bank,YES69,96G1,KZP
1284249,2026-01-26,etpay,etpay/qr-code,qr-code,DEPOSIT,THB,TH,completed,14:00 - 14:59,1,...,0,1,0,0,0,etpay,qr,WE67,96G1,KZP


In [25]:
df_withdraw = df[df['type'] == 'WITHDRAW']

## DEPOSIT DATA

In [26]:
df_deposit = df_deposit[df_deposit["Date"] >= '2025-11-01']

In [27]:
df_deposit

Unnamed: 0,Date,providerKey,method,channel_type,type,reqCurrency,Country,status,Hour,Count,...,Count_03m00s_Above,Count_03m00s_Below,Count_03m31s_to_05m00s,Count_05m00s_to_10m00s,Count_10m00s_Above,providerName,channel_main,brand,account_group,group_re
0,2026-01-05,toppay-mx,toppay-mx/bank-transfer,bank-transfer,DEPOSIT,MXN,MX,completed,04:00 - 04:59,42,...,4,38,3,1,0,toppay-mx,bank,MXWOW,KZG1,KZO
1,2025-11-25,wingpay-bd,wingpay-bd/bkash-qr,bkash-qr,DEPOSIT,BDT,BD,completed,14:00 - 14:59,7,...,0,7,0,0,0,wingpay-bd,bkash,ADDA7,KZG1,KZO
2,2025-12-11,mmpay,mmpay/gcash-direct,gcash-direct,DEPOSIT,PHP,PH,completed,09:00 - 09:59,121,...,13,108,9,4,0,mmpay,gcash,WINMAYA,96G1,KZP
3,2026-01-13,wingpay-bd,wingpay-bd/bkash-qr,bkash-qr,DEPOSIT,BDT,BD,completed,23:00 - 23:59,7,...,1,6,1,0,0,wingpay-bd,bkash,BDPOP,KZG1,KZO
4,2025-12-30,goalpay,goalpay/paymaya,paymaya,DEPOSIT,PHP,PH,timeout,04:00 - 04:59,3,...,0,0,0,0,0,goalpay,paymaya,EZWIN,KZ,KZO
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1284246,2026-01-26,etpay,etpay/qr-code,qr-code,DEPOSIT,THB,TH,timeout,13:00 - 13:59,1,...,0,0,0,0,0,etpay,qr,MC36,96G1,KZP
1284247,2026-01-26,dppay,dppay/qr-code,qr-code,DEPOSIT,THB,TH,timeout,13:00 - 13:59,1,...,0,0,0,0,0,dppay,qr,SOS69,BLG1,KZP
1284248,2026-01-26,hspay,hspay/bank-transfer-native,bank-transfer-native,DEPOSIT,THB,TH,completed,14:00 - 14:59,1,...,0,1,0,0,0,hspay,bank,YES69,96G1,KZP
1284249,2026-01-26,etpay,etpay/qr-code,qr-code,DEPOSIT,THB,TH,completed,14:00 - 14:59,1,...,0,1,0,0,0,etpay,qr,WE67,96G1,KZP


In [28]:
# import pandas as pd
# import gspread
# from gspread_dataframe import set_with_dataframe
# from google.oauth2.service_account import Credentials

# # --- 1. CONFIGURATION ---
# CREDENTIALS_FILE = 'gen-lang-client-0209575391-96d90a513b0b.json'
# SHEET_ID = '1Nq9u4bg0tvLnUutVh2TcxXOxe-G2E65kxy_pbJ8pce4'
# WORKSHEET_NAME = 'Deposit Data'

# def write_df_to_gsheet(dataframe):
#     # --- 2. AUTHENTICATION ---
#     # Define the scope (permissions) required
#     scopes = [
#         'https://www.googleapis.com/auth/spreadsheets',
#         'https://www.googleapis.com/auth/drive'
#     ]
    
#     # Authenticate using the JSON key file
#     credentials = Credentials.from_service_account_file(
#         CREDENTIALS_FILE, 
#         scopes=scopes
#     )
#     gc = gspread.authorize(credentials)

#     # --- 3. CONNECT TO SHEET ---
#     print(f"Opening sheet ID: {SHEET_ID}...")
#     sh = gc.open_by_key(SHEET_ID)
    
#     try:
#         worksheet = sh.worksheet(WORKSHEET_NAME)
#     except gspread.WorksheetNotFound:
#         print(f"Worksheet '{WORKSHEET_NAME}' not found. Creating it...")
#         worksheet = sh.add_worksheet(title=WORKSHEET_NAME, rows="100", cols="20")

#     # --- 4. WRITE DATA ---
#     print("Clearing old data...")
#     worksheet.clear()  # Removes old data so rows don't overlap
    
#     print(f"Writing {len(dataframe)} rows to Google Sheets...")
#     # 'set_with_dataframe' handles headers and NaN values automatically
#     set_with_dataframe(worksheet, dataframe)
    
#     print("Done!")

# # --- Usage ---
# if __name__ == "__main__":
#     # Assuming 'df' exists from your previous code
#     # If not, uncomment the line below to test with dummy data:
#     # df = pd.DataFrame({'Date': ['2023-01-01'], 'Amount': [100]})
    
#     write_df_to_gsheet(df_deposit)

In [29]:
df_deposit.loc[df_deposit["status"] == "completed", "Count Success"] = df_deposit["Count"]

df_deposit.loc[df_deposit["status"].str.contains("error", na=False), "Count Error"] = df_deposit["Count"]

df_deposit.loc[df_deposit["status"].str.contains("timeout", na=False), "Count Timeout"] = df_deposit["Count"]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_deposit.loc[df_deposit["status"] == "completed", "Count Success"] = df_deposit["Count"]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_deposit.loc[df_deposit["status"].str.contains("error", na=False), "Count Error"] = df_deposit["Count"]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_depos

In [30]:
df_deposit.loc[df_deposit["status"].str.contains("error", na=False)].shape[0] + df_deposit.loc[df_deposit["status"].str.contains("timeout", na=False)].shape[0] + df_deposit.loc[df_deposit["status"] == "completed", "Count Success"].shape[0]

950885

In [31]:
df_deposit["Count"].sum()


np.int64(26643389)

In [32]:
# 1. Define your columns
group_cols = ["Date", "providerName", "channel_type", "type", "Country", "account_group", "group_re", "Hour"]

sum_cols = [
    "Count", "Count Success", "Count Error", "Count Timeout",
    "Total_Net_Amount", 
    "winsorized_total_time_seconds", 
    'Count_01m30s_Below', 'Count_01m31s_to_02m00s',
       'Count_02m01s_to_03m00s', 'Count_03m00s_Above'
]

# 2. Perform Group By and Sum
df_grouped_dep = df_deposit.groupby(group_cols)[sum_cols].sum().reset_index()

# 3. (Optional) Inspect the result
print(df_grouped_dep.head())

        Date providerName   channel_type     type Country account_group  \
0 2025-11-01        aipay        qr-code  DEPOSIT      TH          KZG1   
1 2025-11-01    apollopay       gcash-qr  DEPOSIT      PH          96G1   
2 2025-11-01     cloudpay  bank-transfer  DEPOSIT      TH          BLG1   
3 2025-11-01        dppay        qr-code  DEPOSIT      TH          96G1   
4 2025-11-01        dppay        qr-code  DEPOSIT      TH          BLG1   

  group_re           Hour  Count  Count Success  Count Error  Count Timeout  \
0      KZO  14:00 - 14:59      1              1            0              0   
1      KZP  21:00 - 21:59      1              1            0              0   
2      KZP  06:00 - 06:59      1              1            0              0   
3      KZP  10:00 - 10:59      1              1            0              0   
4      KZP  22:00 - 22:59      4              4            0              0   

                             Total_Net_Amount  winsorized_total_time_secon

In [33]:
import pandas as pd
from datetime import timedelta

# --- 1. PREPARE DATA & INDICES ---
# Assumes df_grouped has [Date, Hour, providerName, etc., Count, NetAmount...]

df_grouped_dep['Date'] = pd.to_datetime(df_grouped_dep['Date'])
df_grouped_dep['Week_Index'] = df_grouped_dep['Date'].dt.isocalendar().week.astype(int)

# Helper: Day of Week (0=Mon, 6=Sun)
df_grouped_dep['DayOfWeek'] = df_grouped_dep['Date'].dt.dayofweek

# --- 2. DETERMINE THE CUTOFF (Based on Current Week) ---
print("Identifying Cutoff from Current Week...")

# Identify the Current Week (Highest Index)
current_week_idx = df_grouped_dep['Week_Index'].max()
current_week_data = df_grouped_dep[df_grouped_dep['Week_Index'] == current_week_idx]

if not current_week_data.empty:
    # 1. How far into the week are we? (e.g., Today is Monday = 0)
    cutoff_day_idx = current_week_data['DayOfWeek'].max()
    
    # 2. What is the latest hour on that specific day?
    cutoff_hour = current_week_data.loc[
        current_week_data['DayOfWeek'] == cutoff_day_idx, 'Hour'
    ].max()
    
    print(f" -> Cutoff Point: Day {cutoff_day_idx} (Mon=0) at {cutoff_hour}")
else:
    # Fallback if current week is empty
    cutoff_day_idx = 6 # Sunday
    cutoff_hour = "23:59 - 23:59"
    print(" -> No current data. defaulting to full week.")

# --- 3. CREATE THE 'MARK' COLUMN ---
print("Tagging rows with 'pacing_mark'...")

def get_pacing_mark(row):
    # Rule 1: Always include the current week (it is the reference)
    if row['Week_Index'] == current_week_idx:
        return 'Included'
    
    # Rule 2: For historical weeks, compare Day & Hour
    # A. If the day is EARLIER in the week (e.g. Current is Wed, Row is Mon) -> Keep
    if row['DayOfWeek'] < cutoff_day_idx:
        return 'Included'
    
    # B. If it's the SAME Day, check the Hour
    if row['DayOfWeek'] == cutoff_day_idx:
        if row['Hour'] <= cutoff_hour:
            return 'Included'
        else:
            return 'Excluded' # Same day, but later hour
            
    # C. If the day is LATER in the week (e.g. Current is Mon, Row is Tue) -> Exclude
    if row['DayOfWeek'] > cutoff_day_idx:
        return 'Excluded'

    return 'Excluded'

# Apply the logic
df_grouped_dep['pacing_mark'] = df_grouped_dep.apply(get_pacing_mark, axis=1)

# --- 4. GENERATE LABELS ---
print("Generating Week Labels...")
# Week start (Monday)
df_grouped_dep['Week_Start'] = df_grouped_dep['Date'] - pd.to_timedelta(df_grouped_dep['Date'].dt.dayofweek, unit='D')

# Week end (Sunday)
df_grouped_dep['Week_End'] = df_grouped_dep['Week_Start'] + pd.to_timedelta(6, unit='D')

# Use Week_End for label
df_grouped_dep['Week_Label'] = (
    df_grouped_dep['Week_End'].dt.day.astype(str) + " " +
    df_grouped_dep['Week_End'].dt.strftime('%b %y')
)

# --- 5. AGGREGATE (INCLUDE 'pacing_mark' IN GROUP BY) ---
print("Aggregating...")

final_group_cols = [
    "Week_Label", 
    "Week_Index", 
    "pacing_mark", # <--- CRITICAL: Keep this so you can filter in Looker
    "providerName", "channel_type", "type", "Country", "account_group", "group_re"
]
sum_cols = [
    "Count", "Count Success", "Count Error", "Count Timeout",
    "Total_Net_Amount", 
    "winsorized_total_time_seconds", 
    'Count_01m30s_Below', 'Count_01m31s_to_02m00s',
       'Count_02m01s_to_03m00s', 'Count_03m00s_Above'
]

df_final_dep = df_grouped_dep.groupby(final_group_cols)[sum_cols].sum().reset_index()

# Sort DESC so Week 49 is top
df_final_dep = df_final_dep.sort_values(by=['Week_Index', 'pacing_mark'], ascending=[False, True])

# print(df_final[['Week_Label', 'pacing_mark', 'Count']].head())

# write_df_to_gsheet(df_final)



Identifying Cutoff from Current Week...
 -> Cutoff Point: Day 6 (Mon=0) at 23:00 - 23:59
Tagging rows with 'pacing_mark'...
Generating Week Labels...
Aggregating...


In [34]:
df_final_dep.loc[df_final_dep["Week_Label"].str.contains("26"), "Week_Index"] += 52

In [35]:
df_final_dep

Unnamed: 0,Week_Label,Week_Index,pacing_mark,providerName,channel_type,type,Country,account_group,group_re,Count,Count Success,Count Error,Count Timeout,Total_Net_Amount,winsorized_total_time_seconds,Count_01m30s_Below,Count_01m31s_to_02m00s,Count_02m01s_to_03m00s,Count_03m00s_Above
1537,28 Dec 25,52,Included,aipay,qr-code,DEPOSIT,TH,96G1,KZP,1306,979,27,300,1074492.000000000000000000000,77288.0,771,101,59,48
1538,28 Dec 25,52,Included,aipay,qr-code,DEPOSIT,TH,KZG1,KZO,6795,5183,107,1505,2549155.390000000000000000000,600258.0,2480,1274,1019,410
1539,28 Dec 25,52,Included,aipay,qr-code,DEPOSIT,TH,KZG1,KZP,1222,850,16,356,434389.0000000000000000000000,110474.0,374,210,168,98
1540,28 Dec 25,52,Included,aipay,qr-code,DEPOSIT,TH,KZG2,KZO,6413,4899,206,1308,1987161.600000000000000000000,523034.0,2598,1121,853,327
1541,28 Dec 25,52,Included,aipay,qr-code,DEPOSIT,TH,Others,KZP,1641,1206,44,391,599058.0000000000000000000000,129771.0,606,303,221,76
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2087,4 Jan 26,53,Included,xqpay-bd,nagad,DEPOSIT,BD,KZG1,KZO,3358,2382,3,973,790149.0000000000000000000000,163595.0,2015,203,98,66
2088,4 Jan 26,53,Included,xqpay-bd,rocket,DEPOSIT,BD,KZG1,KZO,125,60,0,65,17605.00000000000000000000000,7951.0,26,19,9,6
2089,4 Jan 26,53,Included,ydpay-mx,clabe,DEPOSIT,MX,KZG1,KZO,446,338,3,105,27961.67000000000000000000000,45984.0,137,64,58,79
2090,4 Jan 26,53,Included,yzpay-th,bank-transfer,DEPOSIT,TH,BLG1,KZP,192,84,0,108,49513.07000000000000000000000,38242.0,0,10,7,67


In [36]:
df_final_dep.columns = ['Week_Label', 'Week_Index', 'pacing_mark','providerName', 'channel_type', 'type', 'Country', 'account_group', "group_re",
        'DP Count', "Count Success", "Count Error", "Count Timeout", 'Total Amount DP',
       'Avg DP Time',     'Count_01m30s_Below', 'Count_01m31s_to_02m00s',
       'Count_02m01s_to_03m00s', 'Count_03m00s_Above']

# cond1 = df_final_dep["Week_Index"] == (df_final_dep["Week_Index"].max() - 1)
# cond2 = df_final_dep["pacing_mark"] == "Excluded"
# df_final_dep.loc[~(cond1 & cond2), "pacing_mark"] = None

df_final_dep["Week_Index"] = df_final_dep["Week_Index"] - df_final_dep["Week_Index"].max()

# df_final_dep = df_final_dep[df_final_dep["Week_Index"] != 0 ]
# df_final_dep["Week_Index"] = df_final_dep["Week_Index"] + 1
# df_final_dep = df_final_dep[df_final_dep["Week_Index"] >= -5]

In [37]:
df_final_dep

Unnamed: 0,Week_Label,Week_Index,pacing_mark,providerName,channel_type,type,Country,account_group,group_re,DP Count,Count Success,Count Error,Count Timeout,Total Amount DP,Avg DP Time,Count_01m30s_Below,Count_01m31s_to_02m00s,Count_02m01s_to_03m00s,Count_03m00s_Above
1537,28 Dec 25,-5,Included,aipay,qr-code,DEPOSIT,TH,96G1,KZP,1306,979,27,300,1074492.000000000000000000000,77288.0,771,101,59,48
1538,28 Dec 25,-5,Included,aipay,qr-code,DEPOSIT,TH,KZG1,KZO,6795,5183,107,1505,2549155.390000000000000000000,600258.0,2480,1274,1019,410
1539,28 Dec 25,-5,Included,aipay,qr-code,DEPOSIT,TH,KZG1,KZP,1222,850,16,356,434389.0000000000000000000000,110474.0,374,210,168,98
1540,28 Dec 25,-5,Included,aipay,qr-code,DEPOSIT,TH,KZG2,KZO,6413,4899,206,1308,1987161.600000000000000000000,523034.0,2598,1121,853,327
1541,28 Dec 25,-5,Included,aipay,qr-code,DEPOSIT,TH,Others,KZP,1641,1206,44,391,599058.0000000000000000000000,129771.0,606,303,221,76
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2087,4 Jan 26,-4,Included,xqpay-bd,nagad,DEPOSIT,BD,KZG1,KZO,3358,2382,3,973,790149.0000000000000000000000,163595.0,2015,203,98,66
2088,4 Jan 26,-4,Included,xqpay-bd,rocket,DEPOSIT,BD,KZG1,KZO,125,60,0,65,17605.00000000000000000000000,7951.0,26,19,9,6
2089,4 Jan 26,-4,Included,ydpay-mx,clabe,DEPOSIT,MX,KZG1,KZO,446,338,3,105,27961.67000000000000000000000,45984.0,137,64,58,79
2090,4 Jan 26,-4,Included,yzpay-th,bank-transfer,DEPOSIT,TH,BLG1,KZP,192,84,0,108,49513.07000000000000000000000,38242.0,0,10,7,67


## WITHDRAWL DATA

In [38]:
df_final_dep

Unnamed: 0,Week_Label,Week_Index,pacing_mark,providerName,channel_type,type,Country,account_group,group_re,DP Count,Count Success,Count Error,Count Timeout,Total Amount DP,Avg DP Time,Count_01m30s_Below,Count_01m31s_to_02m00s,Count_02m01s_to_03m00s,Count_03m00s_Above
1537,28 Dec 25,-5,Included,aipay,qr-code,DEPOSIT,TH,96G1,KZP,1306,979,27,300,1074492.000000000000000000000,77288.0,771,101,59,48
1538,28 Dec 25,-5,Included,aipay,qr-code,DEPOSIT,TH,KZG1,KZO,6795,5183,107,1505,2549155.390000000000000000000,600258.0,2480,1274,1019,410
1539,28 Dec 25,-5,Included,aipay,qr-code,DEPOSIT,TH,KZG1,KZP,1222,850,16,356,434389.0000000000000000000000,110474.0,374,210,168,98
1540,28 Dec 25,-5,Included,aipay,qr-code,DEPOSIT,TH,KZG2,KZO,6413,4899,206,1308,1987161.600000000000000000000,523034.0,2598,1121,853,327
1541,28 Dec 25,-5,Included,aipay,qr-code,DEPOSIT,TH,Others,KZP,1641,1206,44,391,599058.0000000000000000000000,129771.0,606,303,221,76
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2087,4 Jan 26,-4,Included,xqpay-bd,nagad,DEPOSIT,BD,KZG1,KZO,3358,2382,3,973,790149.0000000000000000000000,163595.0,2015,203,98,66
2088,4 Jan 26,-4,Included,xqpay-bd,rocket,DEPOSIT,BD,KZG1,KZO,125,60,0,65,17605.00000000000000000000000,7951.0,26,19,9,6
2089,4 Jan 26,-4,Included,ydpay-mx,clabe,DEPOSIT,MX,KZG1,KZO,446,338,3,105,27961.67000000000000000000000,45984.0,137,64,58,79
2090,4 Jan 26,-4,Included,yzpay-th,bank-transfer,DEPOSIT,TH,BLG1,KZP,192,84,0,108,49513.07000000000000000000000,38242.0,0,10,7,67


In [39]:
# import pandas as pd
# import gspread
# from gspread_dataframe import set_with_dataframe
# from google.oauth2.service_account import Credentials

# # --- 1. CONFIGURATION ---
# CREDENTIALS_FILE = 'gen-lang-client-0209575391-96d90a513b0b.json'
# SHEET_ID = '1Nq9u4bg0tvLnUutVh2TcxXOxe-G2E65kxy_pbJ8pce4'
# WORKSHEET_NAME = 'Withdrawal Data'

# def write_df_to_gsheet(dataframe):
#     # --- 2. AUTHENTICATION ---
#     # Define the scope (permissions) required
#     scopes = [
#         'https://www.googleapis.com/auth/spreadsheets',
#         'https://www.googleapis.com/auth/drive'
#     ]
    
#     # Authenticate using the JSON key file
#     credentials = Credentials.from_service_account_file(
#         CREDENTIALS_FILE, 
#         scopes=scopes
#     )
#     gc = gspread.authorize(credentials)

#     # --- 3. CONNECT TO SHEET ---
#     print(f"Opening sheet ID: {SHEET_ID}...")
#     sh = gc.open_by_key(SHEET_ID)
    
#     try:
#         worksheet = sh.worksheet(WORKSHEET_NAME)
#     except gspread.WorksheetNotFound:
#         print(f"Worksheet '{WORKSHEET_NAME}' not found. Creating it...")
#         worksheet = sh.add_worksheet(title=WORKSHEET_NAME, rows="100", cols="20")

#     # --- 4. WRITE DATA ---
#     print("Clearing old data...")
#     worksheet.clear()  # Removes old data so rows don't overlap
    
#     print(f"Writing {len(dataframe)} rows to Google Sheets...")
#     # 'set_with_dataframe' handles headers and NaN values automatically
#     set_with_dataframe(worksheet, dataframe)
    
#     print("Done!")

# # --- Usage ---
# if __name__ == "__main__":
#     # Assuming 'df' exists from your previous code
#     # If not, uncomment the line below to test with dummy data:
#     # df = pd.DataFrame({'Date': ['2023-01-01'], 'Amount': [100]})
    
#     write_df_to_gsheet(df_withdraw)

In [40]:
df_withdraw[["Date", "providerName", "channel_type", "type" , "Country", "account_group"]]

Unnamed: 0,Date,providerName,channel_type,type,Country,account_group
13,2026-01-12,dippay,bank-transfer,WITHDRAW,TH,KZG2
14,2026-01-12,dippay,bank-transfer,WITHDRAW,TH,Others
18,2025-12-21,u2cpay,easypaisa,WITHDRAW,PK,KZG1
21,2025-12-08,mmpay,paymaya,WITHDRAW,PH,96G1
24,2026-01-23,goalpay,paymaya,WITHDRAW,PH,KZ
...,...,...,...,...,...,...
1284217,2026-01-26,dppay,bank-transfer,WITHDRAW,TH,BLG1
1284218,2026-01-26,toppay,bank-transfer,WITHDRAW,TH,96G1
1284219,2026-01-26,etpay,bank-transfer,WITHDRAW,TH,96G2
1284223,2026-01-26,dppay,bank-transfer,WITHDRAW,TH,KZG1


In [41]:
df_withdraw = df_withdraw[df_withdraw["Date"] >= "2025-11-01"]

In [42]:
df_withdraw[["Count", "Total_Net_Amount", "winsorized_total_time_seconds", "Count_03m00s_Below", "Count_03m31s_to_05m00s", "Count_05m00s_to_10m00s", 'Count_10m00s_Above']]

Unnamed: 0,Count,Total_Net_Amount,winsorized_total_time_seconds,Count_03m00s_Below,Count_03m31s_to_05m00s,Count_05m00s_to_10m00s,Count_10m00s_Above
13,18,39062.00000000000000000000000000000000000000,3467.0,9,6,3,0
14,18,39062.00000000000000000000000000000000000000,3467.0,9,6,3,0
18,5,3904.00000000000000000000000000000000000000,272.0,5,0,0,0
21,5,1100.00000000000000000000000000000000000000,393.0,5,0,0,0
24,10,4800.00000000000000000000000000000000000000,879.0,10,0,0,0
...,...,...,...,...,...,...,...
1284217,9,7005.00000000000000000000000000000000000000,1159.0,7,1,1,0
1284218,5,6724.00000000000000000000000000000000000000,411.0,5,0,0,0
1284219,3,2424.00000000000000000000000000000000000000,195.0,3,0,0,0
1284223,8,4009.00000000000000000000000000000000000000,2152.0,4,1,2,1


In [43]:
# 1. Define your columns
group_cols = ["Date", "providerName", "channel_type", "type", "Country", "account_group", "group_re", "Hour"]

sum_cols = [
    "Count", 
    "Total_Net_Amount", 
    "winsorized_total_time_seconds", 
    "Count_03m00s_Below", 
    "Count_03m31s_to_05m00s", 
    "Count_05m00s_to_10m00s", 
    "Count_10m00s_Above"
]

# 2. Perform Group By and Sum
df_grouped = df_withdraw.groupby(group_cols)[sum_cols].sum().reset_index()

# 3. (Optional) Inspect the result
print(df_grouped.head())

        Date providerName   channel_type      type Country account_group  \
0 2025-11-02        aipay  bank-transfer  WITHDRAW      TH          96G1   
1 2025-11-02        aipay  bank-transfer  WITHDRAW      TH          96G1   
2 2025-11-02        aipay  bank-transfer  WITHDRAW      TH          96G1   
3 2025-11-02        aipay  bank-transfer  WITHDRAW      TH          KZG1   
4 2025-11-02        aipay  bank-transfer  WITHDRAW      TH          KZG1   

  group_re           Hour  Count                             Total_Net_Amount  \
0      KZP  16:00 - 16:59      4                48469.00000000000000000000000   
1      KZP  17:00 - 17:59      1  7942.00000000000000000000000000000000000000   
2      KZP  18:00 - 18:59     10                39658.00000000000000000000000   
3      KZO  06:00 - 06:59      1  2000.00000000000000000000000000000000000000   
4      KZO  07:00 - 07:59      3                6067.000000000000000000000000   

   winsorized_total_time_seconds  Count_03m00s_Below  Co

In [44]:
import pandas as pd
from datetime import timedelta

# --- 1. PREPARE DATA & INDICES ---
# Assumes df_grouped has [Date, Hour, providerName, etc., Count, NetAmount...]

print("Calculating Week Indices...")
df_grouped['Date'] = pd.to_datetime(df_grouped['Date'])
df_grouped['Week_Index'] = df_grouped['Date'].dt.isocalendar().week.astype(int)

# Helper: Day of Week (0=Mon, 6=Sun)
df_grouped['DayOfWeek'] = df_grouped['Date'].dt.dayofweek

# --- 2. DETERMINE THE CUTOFF (Based on Current Week) ---
print("Identifying Cutoff from Current Week...")

# Identify the Current Week (Highest Index)
current_week_idx = df_grouped['Week_Index'].max()
current_week_data = df_grouped[df_grouped['Week_Index'] == current_week_idx]

if not current_week_data.empty:
    # 1. How far into the week are we? (e.g., Today is Monday = 0)
    cutoff_day_idx = current_week_data['DayOfWeek'].max()
    
    # 2. What is the latest hour on that specific day?
    cutoff_hour = current_week_data.loc[
        current_week_data['DayOfWeek'] == cutoff_day_idx, 'Hour'
    ].max()
    
    print(f" -> Cutoff Point: Day {cutoff_day_idx} (Mon=0) at {cutoff_hour}")
else:
    # Fallback if current week is empty
    cutoff_day_idx = 6 # Sunday
    cutoff_hour = "23:59 - 23:59"
    print(" -> No current data. defaulting to full week.")

# --- 3. CREATE THE 'MARK' COLUMN ---
print("Tagging rows with 'pacing_mark'...")

def get_pacing_mark(row):
    # Rule 1: Always include the current week (it is the reference)
    if row['Week_Index'] == current_week_idx:
        return 'Included'
    
    # Rule 2: For historical weeks, compare Day & Hour
    # A. If the day is EARLIER in the week (e.g. Current is Wed, Row is Mon) -> Keep
    if row['DayOfWeek'] < cutoff_day_idx:
        return 'Included'
    
    # B. If it's the SAME Day, check the Hour
    if row['DayOfWeek'] == cutoff_day_idx:
        if row['Hour'] <= cutoff_hour:
            return 'Included'
        else:
            return 'Excluded' # Same day, but later hour
            
    # C. If the day is LATER in the week (e.g. Current is Mon, Row is Tue) -> Exclude
    if row['DayOfWeek'] > cutoff_day_idx:
        return 'Excluded'

    return 'Excluded'

# Apply the logic
df_grouped['pacing_mark'] = df_grouped.apply(get_pacing_mark, axis=1)

# --- 4. GENERATE LABELS ---
print("Generating Week Labels...")
# Week start (Monday)
df_grouped['Week_Start'] = df_grouped['Date'] - pd.to_timedelta(df_grouped['Date'].dt.dayofweek, unit='D')

# Week end (Sunday)
df_grouped['Week_End'] = df_grouped['Week_Start'] + pd.to_timedelta(6, unit='D')

# Use Week_End for label
df_grouped['Week_Label'] = (
    df_grouped['Week_End'].dt.day.astype(str) + " " +
    df_grouped['Week_End'].dt.strftime('%b %y')
)


# --- 5. AGGREGATE (INCLUDE 'pacing_mark' IN GROUP BY) ---
print("Aggregating...")

final_group_cols = [
    "Week_Label", 
    "Week_Index", 
    "pacing_mark", # <--- CRITICAL: Keep this so you can filter in Looker
    "providerName", "channel_type", "type", "Country", "account_group", "group_re"
]

sum_cols = [
    "Count", "Total_Net_Amount", "winsorized_total_time_seconds", 
    "Count_03m00s_Below", "Count_03m31s_to_05m00s", 
    "Count_05m00s_to_10m00s", "Count_10m00s_Above"
]

df_final = df_grouped.groupby(final_group_cols)[sum_cols].sum().reset_index()

# Sort DESC so Week 49 is top
df_final = df_final.sort_values(by=['Week_Index', 'pacing_mark'], ascending=[False, True])

print(df_final[['Week_Label', 'pacing_mark', 'Count']].head())

# write_df_to_gsheet(df_final)

Calculating Week Indices...
Identifying Cutoff from Current Week...
 -> Cutoff Point: Day 6 (Mon=0) at 23:00 - 23:59
Tagging rows with 'pacing_mark'...
Generating Week Labels...
Aggregating...
     Week_Label pacing_mark  Count
1051  28 Dec 25    Included    418
1052  28 Dec 25    Included    347
1053  28 Dec 25    Included     64
1054  28 Dec 25    Included    130
1055  28 Dec 25    Included     31


In [45]:
df_final.loc[df_final["Week_Label"].str.contains("26"), "Week_Index"] += 52

In [46]:
df_final.columns = ['Week_Label', 'Week_Index', 'pacing_mark','providerName', 'channel_type', 'type',
       'Country', 'account_group', 'group_re','WD Count', 'Total Amount WD',
       'Avg WD Time', 'Count_03m00s_Below',
       'Count_03m31s_to_05m00s', 'Count_05m00s_to_10m00s',
       'Count_10m00s_Above']
       
# cond1 = df_final["Week_Index"] == (df_final["Week_Index"].max() - 1)
# cond2 = df_final["pacing_mark"] == "Excluded"
# df_final.loc[~(cond1 & cond2), "pacing_mark"] = None

df_final["Week_Index"] = df_final["Week_Index"] - df_final["Week_Index"].max()

# df_final = df_final[df_final["Week_Index"] != 0 ]
# df_final["Week_Index"] = df_final["Week_Index"] + 1
# df_final = df_final[df_final["Week_Index"] >= -5]

In [47]:
df_final

Unnamed: 0,Week_Label,Week_Index,pacing_mark,providerName,channel_type,type,Country,account_group,group_re,WD Count,Total Amount WD,Avg WD Time,Count_03m00s_Below,Count_03m31s_to_05m00s,Count_05m00s_to_10m00s,Count_10m00s_Above
1051,28 Dec 25,-5,Included,aipay,bank-transfer,WITHDRAW,TH,96G1,KZP,418,990764.0000000000000000000000,68219.0,334,3,47,34
1052,28 Dec 25,-5,Included,aipay,bank-transfer,WITHDRAW,TH,KZG1,KZO,347,2512641.000000000000000000000,165229.0,216,14,32,85
1053,28 Dec 25,-5,Included,aipay,bank-transfer,WITHDRAW,TH,KZG1,KZP,64,567532.0000000000000000000000,35485.0,40,0,5,19
1054,28 Dec 25,-5,Included,aipay,bank-transfer,WITHDRAW,TH,KZG2,KZO,130,890719.0000000000000000000000,53442.0,72,1,15,42
1055,28 Dec 25,-5,Included,aipay,bank-transfer,WITHDRAW,TH,Others,KZP,31,114278.0000000000000000000000,11765.0,18,0,5,8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1398,4 Jan 26,-4,Included,xpay-pk,easypaisa,WITHDRAW,PK,KZG1,KZO,325,1071114.000000000000000000000,22511.0,311,5,5,4
1399,4 Jan 26,-4,Included,xqpay-bd,bkash,WITHDRAW,BD,KZG1,KZO,1185,1164546.000000000000000000000,70677.0,1166,5,8,6
1400,4 Jan 26,-4,Included,xqpay-bd,nagad,WITHDRAW,BD,KZG1,KZO,952,864338.0000000000000000000000,48122.0,944,3,2,3
1401,4 Jan 26,-4,Included,xqpay-bd,rocket,WITHDRAW,BD,KZG1,KZO,12,6585.000000000000000000000000,2600.0,6,2,3,1


In [48]:
df_total_final = pd.concat([df_final_dep, df_final], axis=0)

In [49]:
df_total_prev = df_total_final[df_total_final["Week_Index"] < 0]
df_total_prev["Week_Index"] = df_total_prev["Week_Index"] + 1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_total_prev["Week_Index"] = df_total_prev["Week_Index"] + 1


In [50]:
df_total_prev.columns = ['Week_Label', 'Week_Index', 'pacing_mark', 'providerName',
       'channel_type', 'type', 'Country', 'account_group', 'group_re','DP Count Prev', 'Count Success Prev',
       'Count Error Prev', 'Count Timeout Prev',
       'Total Amount DP Prev', 'Avg DP Time Prev', 'Count_01m30s_Below Prev',
       'Count_01m31s_to_02m00s Prev', 'Count_02m01s_to_03m00s Prev',
       'Count_03m00s_Above Prev', 'WD Count Prev', 'Total Amount WD Prev', 'Avg WD Time Prev',
       'Count_03m00s_Below Prev', 'Count_03m31s_to_05m00s Prev',
       'Count_05m00s_to_10m00s Prev', 'Count_10m00s_Above Prev']

In [51]:
df_total_prev

Unnamed: 0,Week_Label,Week_Index,pacing_mark,providerName,channel_type,type,Country,account_group,group_re,DP Count Prev,...,Count_01m31s_to_02m00s Prev,Count_02m01s_to_03m00s Prev,Count_03m00s_Above Prev,WD Count Prev,Total Amount WD Prev,Avg WD Time Prev,Count_03m00s_Below Prev,Count_03m31s_to_05m00s Prev,Count_05m00s_to_10m00s Prev,Count_10m00s_Above Prev
1537,28 Dec 25,-4,Included,aipay,qr-code,DEPOSIT,TH,96G1,KZP,1306,...,101,59,48,,,,,,,
1538,28 Dec 25,-4,Included,aipay,qr-code,DEPOSIT,TH,KZG1,KZO,6795,...,1274,1019,410,,,,,,,
1539,28 Dec 25,-4,Included,aipay,qr-code,DEPOSIT,TH,KZG1,KZP,1222,...,210,168,98,,,,,,,
1540,28 Dec 25,-4,Included,aipay,qr-code,DEPOSIT,TH,KZG2,KZO,6413,...,1121,853,327,,,,,,,
1541,28 Dec 25,-4,Included,aipay,qr-code,DEPOSIT,TH,Others,KZP,1641,...,303,221,76,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1398,4 Jan 26,-3,Included,xpay-pk,easypaisa,WITHDRAW,PK,KZG1,KZO,,...,,,,325,1071114.000000000000000000000,22511.0,311,5,5,4
1399,4 Jan 26,-3,Included,xqpay-bd,bkash,WITHDRAW,BD,KZG1,KZO,,...,,,,1185,1164546.000000000000000000000,70677.0,1166,5,8,6
1400,4 Jan 26,-3,Included,xqpay-bd,nagad,WITHDRAW,BD,KZG1,KZO,,...,,,,952,864338.0000000000000000000000,48122.0,944,3,2,3
1401,4 Jan 26,-3,Included,xqpay-bd,rocket,WITHDRAW,BD,KZG1,KZO,,...,,,,12,6585.000000000000000000000000,2600.0,6,2,3,1


In [52]:
index_cols = [
    'Week_Label', 'Week_Index', 'providerName',
    'channel_type', 'type', 'Country', 'account_group', 'group_re'
]

metric_cols = [
    'DP Count',  "Count Success", "Count Error", "Count Timeout", 'Total Amount DP', 'Avg DP Time',
    'Count_01m30s_Below', 'Count_01m31s_to_02m00s',
    'Count_02m01s_to_03m00s', 'Count_03m00s_Above',
    'WD Count', 'Total Amount WD', 'Avg WD Time',
    'Count_03m00s_Below', 'Count_03m31s_to_05m00s',
    'Count_05m00s_to_10m00s', 'Count_10m00s_Above'
]

df_grouped_final = (
    df_total_final
    .groupby(index_cols)[metric_cols]
    .sum()
    .reset_index()
)

In [53]:
df_grouped_final = df_grouped_final.sort_values(by=['Week_Index', 'type'], ascending=[False, True])

In [54]:
index_cols = [
    'Week_Label', 'Week_Index', 'providerName',
    'channel_type', 'type', 'Country', 'account_group', "group_re"
]

metric_cols_prev = [
    'DP Count Prev',  "Count Success Prev", "Count Error Prev", "Count Timeout Prev", 'Total Amount DP Prev', 'Avg DP Time Prev',
    'Count_01m30s_Below Prev', 'Count_01m31s_to_02m00s Prev',
    'Count_02m01s_to_03m00s Prev', 'Count_03m00s_Above Prev',
    'WD Count Prev', 'Total Amount WD Prev', 'Avg WD Time Prev',
    'Count_03m00s_Below Prev', 'Count_03m31s_to_05m00s Prev',
    'Count_05m00s_to_10m00s Prev', 'Count_10m00s_Above Prev'
]

df_grouped_prev = (
    df_total_prev
    .groupby(index_cols)[metric_cols_prev]
    .sum()
    .reset_index()
)

In [55]:
df_grouped_final

Unnamed: 0,Week_Label,Week_Index,providerName,channel_type,type,Country,account_group,group_re,DP Count,Count Success,...,Count_01m31s_to_02m00s,Count_02m01s_to_03m00s,Count_03m00s_Above,WD Count,Total Amount WD,Avg WD Time,Count_03m00s_Below,Count_03m31s_to_05m00s,Count_05m00s_to_10m00s,Count_10m00s_Above
0,1 Feb 26,0,all2pay-pk,easypaisa,DEPOSIT,PK,KZG1,KZO,6,1,...,0,0,0,0,0,0.0,0,0,0,0
2,1 Feb 26,0,all2pay-pk,jazz-cash,DEPOSIT,PK,KZG1,KZO,594,387,...,11,0,1,0,0,0.0,0,0,0,0
8,1 Feb 26,0,apollopay,gcash-qr,DEPOSIT,PH,96G1,KZP,22,20,...,0,0,0,0,0,0.0,0,0,0,0
9,1 Feb 26,0,apollopay,gcash-qr,DEPOSIT,PH,BLG1,KZP,271,242,...,13,4,4,0,0,0.0,0,0,0,0
10,1 Feb 26,0,apollopay,gcash-qr,DEPOSIT,PH,KZ,KZO,2344,2107,...,95,71,55,0,0,0.0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1644,2 Nov 25,-13,wingpay,paymaya,WITHDRAW,PH,KZ,KZO,0,0,...,0,0,0,49,49651.00000000000000000000000,2766.0,49,0,0,0
1645,2 Nov 25,-13,wingpay,paymaya,WITHDRAW,PH,KZG1,KZO,0,0,...,0,0,0,20,9126.000000000000000000000000,1318.0,20,0,0,0
1650,2 Nov 25,-13,xqpay-bd,bkash,WITHDRAW,BD,KZG1,KZO,0,0,...,0,0,0,121,164659.0000000000000000000000,4750.0,120,1,0,0
1652,2 Nov 25,-13,xqpay-bd,nagad,WITHDRAW,BD,KZG1,KZO,0,0,...,0,0,0,95,116255.0000000000000000000000,4769.0,93,1,0,1


In [56]:
df_grouped_prev

Unnamed: 0,Week_Label,Week_Index,providerName,channel_type,type,Country,account_group,group_re,DP Count Prev,Count Success Prev,...,Count_01m31s_to_02m00s Prev,Count_02m01s_to_03m00s Prev,Count_03m00s_Above Prev,WD Count Prev,Total Amount WD Prev,Avg WD Time Prev,Count_03m00s_Below Prev,Count_03m31s_to_05m00s Prev,Count_05m00s_to_10m00s Prev,Count_10m00s_Above Prev
0,11 Jan 26,-2,aipay,bank-transfer,WITHDRAW,TH,96G1,KZP,0,0,...,0,0,0,413,2378929.000000000000000000000,77321.0,355,12,20,26
1,11 Jan 26,-2,aipay,bank-transfer,WITHDRAW,TH,KZG1,KZO,0,0,...,0,0,0,176,1075439.000000000000000000000,76617.0,129,6,1,40
2,11 Jan 26,-2,aipay,bank-transfer,WITHDRAW,TH,KZG1,KZP,0,0,...,0,0,0,35,200311.0000000000000000000000,12410.0,28,1,0,6
3,11 Jan 26,-2,aipay,bank-transfer,WITHDRAW,TH,KZG2,KZO,0,0,...,0,0,0,306,1699666.000000000000000000000,96152.0,230,19,11,46
4,11 Jan 26,-2,aipay,bank-transfer,WITHDRAW,TH,Others,KZP,0,0,...,0,0,0,76,476249.0000000000000000000000,28871.0,52,7,5,12
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3894,9 Nov 25,-11,xqpay-bd,nagad,WITHDRAW,BD,KZG1,KZO,0,0,...,0,0,0,740,779886.0000000000000000000000,38021.0,730,2,2,6
3895,9 Nov 25,-11,xqpay-bd,rocket,DEPOSIT,BD,KZG1,KZO,312,189,...,40,58,23,0,0,0.0,0,0,0,0
3896,9 Nov 25,-11,xqpay-bd,rocket,WITHDRAW,BD,KZG1,KZO,0,0,...,0,0,0,31,31609.00000000000000000000000,2155.0,28,1,2,0
3897,9 Nov 25,-11,zenpay,pix,DEPOSIT,BR,KZG1,KZO,82,65,...,4,4,0,0,0,0.0,0,0,0,0


In [57]:
df_grouped_concat = df_grouped_final.merge(df_grouped_prev[['Week_Index', 'providerName',
    'channel_type', 'type', 'Country', 'account_group', 'group_re','DP Count Prev',  "Count Success Prev", "Count Error Prev", "Count Timeout Prev",'Total Amount DP Prev', 'Avg DP Time Prev',
    'Count_01m30s_Below Prev', 'Count_01m31s_to_02m00s Prev',
    'Count_02m01s_to_03m00s Prev', 'Count_03m00s_Above Prev',
    'WD Count Prev', 'Total Amount WD Prev', 'Avg WD Time Prev',
    'Count_03m00s_Below Prev', 'Count_03m31s_to_05m00s Prev',
    'Count_05m00s_to_10m00s Prev', 'Count_10m00s_Above Prev']], how = 'outer', on = ['Week_Index', 'providerName',
       'channel_type', 'type', 'Country', 'account_group', 'group_re'])

In [58]:
list_data = df_grouped_concat[df_grouped_concat["Week_Label"].notna()][["Week_Label", "Week_Index"]].drop_duplicates()

In [59]:
df_grouped_concat = df_grouped_concat[[i for i in df_grouped_concat.columns if "Week_Label" not in i]].merge(list_data,  how = "left")

In [60]:
df_grouped_concat["Week_Label"] = df_grouped_concat["Week_Label"].astype(str)

In [61]:
df_grouped_concat.groupby(['Week_Label', 'Week_Index']).agg({"WD Count" : "sum"
                                                             , "WD Count Prev": "sum"}).reset_index().sort_values(by=['Week_Index'], ascending=[False])

Unnamed: 0,Week_Label,Week_Index,WD Count,WD Count Prev
0,1 Feb 26,0,40179,528664
8,25 Jan 26,-1,528664,525321
4,18 Jan 26,-2,525321,485822
1,11 Jan 26,-3,485822,504188
11,4 Jan 26,-4,504188,495814
9,28 Dec 25,-5,495814,432240
6,21 Dec 25,-6,432240,400276
2,14 Dec 25,-7,400276,433285
12,7 Dec 25,-8,433285,395136
10,30 Nov 25,-9,395136,379139


In [62]:
df_grouped_concat = df_grouped_concat[["Week_Label"] + [i for i in df_grouped_concat.columns if i != "Week_Label"]]

In [63]:
df_grouped_concat = df_grouped_concat[df_grouped_concat["Week_Index"] >= -6]

In [64]:
df_grouped_concat["group_re"] = df_grouped_concat["group_re"].str.replace("KZG", "KZO")

In [65]:
import pandas as pd
import gspread
from gspread_dataframe import set_with_dataframe
from google.oauth2.service_account import Credentials

# --- 1. CONFIGURATION ---
CREDENTIALS_FILE = 'gen-lang-client-0209575391-96d90a513b0b.json'
SHEET_ID = '1Nq9u4bg0tvLnUutVh2TcxXOxe-G2E65kxy_pbJ8pce4'
WORKSHEET_NAME = 'WoW Data'

def write_df_to_gsheet(dataframe):
    # --- 2. AUTHENTICATION ---
    # Define the scope (permissions) required
    scopes = [
        'https://www.googleapis.com/auth/spreadsheets',
        'https://www.googleapis.com/auth/drive'
    ]
    
    # Authenticate using the JSON key file
    credentials = Credentials.from_service_account_file(
        CREDENTIALS_FILE, 
        scopes=scopes
    )
    gc = gspread.authorize(credentials)

    # --- 3. CONNECT TO SHEET ---
    print(f"Opening sheet ID: {SHEET_ID}...")
    sh = gc.open_by_key(SHEET_ID)
    
    try:
        worksheet = sh.worksheet(WORKSHEET_NAME)
    except gspread.WorksheetNotFound:
        print(f"Worksheet '{WORKSHEET_NAME}' not found. Creating it...")
        worksheet = sh.add_worksheet(title=WORKSHEET_NAME, rows="100", cols="20")

    # --- 4. WRITE DATA ---
    print("Clearing old data...")
    worksheet.clear()  # Removes old data so rows don't overlap
    
    print(f"Writing {len(dataframe)} rows to Google Sheets...")
    # 'set_with_dataframe' handles headers and NaN values automatically
    set_with_dataframe(worksheet, dataframe)
    
    print("Done!")

# --- Usage ---
if __name__ == "__main__":
    # Assuming 'df' exists from your previous code
    # If not, uncomment the line below to test with dummy data:
    # df = pd.DataFrame({'Date': ['2023-01-01'], 'Amount': [100]})
    
    write_df_to_gsheet(df_grouped_concat.sort_values(by=['Week_Index'], ascending=[False]))

Opening sheet ID: 1Nq9u4bg0tvLnUutVh2TcxXOxe-G2E65kxy_pbJ8pce4...
Clearing old data...
Writing 2422 rows to Google Sheets...
Done!
