In [None]:
import os, sys
sys.path.append('../utils')
import pandas as pd
import numpy as np
from queryHelper import prodFetch, adbFetch
from databaseHelper import *
from sheetHelper import *
from datetime import timedelta
import psycopg2
from datetime import datetime
from openpyxl import Workbook

In [None]:
dfTickets = adbFetch(""" 
    SELECT 
        month,
        date,
        complainant_id as driverId,
        category_name,
        issue_name
    FROM ticketModelNew 
    WHERE 
        month IN ('2025-04') 
        AND (category_issue IN (
            'Status Change - Dispute - Driver Submitted Battery but status is Active'
        )
        OR category_name IN (
            'Navigation', 
            'Battery',
            'Meter Issue'
        ))
        AND source IN ('Inbound', 'driverApp', 'Whatsapp')
        AND complainant_type = 'driver'
""")


dfTickets

In [None]:
dfPenalty = adbFetch("""select driverId, date, penaltyWallet/100 as pendingPenalty from dailyDriversHistories force index(dailyDriversHistories_date_driverId_unique) where date >= '20250401' and deletedAt is NULL and driverId like 'D%' """)

dfPenalty

In [None]:
bins = [-1, 0, 100, 500, float('inf')]
labels = ['0', '1-100', '101-500', '>500']

dfPenalty['penalty_cohort'] = pd.cut(dfPenalty['pendingPenalty'], bins=bins, labels=labels, right=True)

dfPenalty.drop(columns=['pendingPenalty'], inplace=True)

dfPenalty

In [None]:
mergedTickets = dfTickets.merge(dfPenalty, how='left', on=['driverId', 'date'])

mergedTickets

In [None]:
mergedTickets[mergedTickets['penalty_cohort'].isna()]

In [None]:
dfTxns = prodFetch("""
    select DATE_FORMAT(date, '%Y-%m') AS month, date, driverId
    FROM transactions FORCE INDEX(transaction_date_index)
    WHERE date >= '2025-04-01'
      AND date < CURRENT_DATE()
      AND deletedAt IS NULL
      AND clientId = 'BS00'
      AND vehicleType NOT IN ('E-2w')
""")

dfTxns

In [None]:
dfPenaltyMonth = adbFetch("""SELECT
    DATE_FORMAT(date, '%Y-%m') AS month,
    driverId,
    AVG(penaltyWallet) / 100 AS avgPenalty
FROM
    dailyDriversHistories FORCE INDEX(dailyDriversHistories_date_driverId_unique)
WHERE
    date >= '2025-04-01'
    and date < current_date()
    AND deletedAt IS NULL
    AND driverId LIKE 'D%'
    AND isBaasDriver != 1
    AND vehicleType != 'E-2w'
    AND clientId = 'BS00'
    AND status IN ('active', 'inactive')
GROUP BY
    month, driverId""")

dfPenaltyMonth
bins = [-1, 0, 100, 500, float('inf')]
labels = ['0', '1-100', '101-500', '>500']

dfPenaltyMonth['penalty_cohort'] = pd.cut(dfPenaltyMonth['avgPenalty'], bins=bins, labels=labels, right=True)

dfPenaltyMonth.drop(columns=['avgPenalty'], inplace=True)

dfPenaltyMonth

In [None]:
dfTxns = prodFetch("""
    SELECT
        DATE_FORMAT(date, '%Y-%m') AS month,
        date,
        driverId
    FROM
        transactions FORCE INDEX(transaction_date_index)
    WHERE
        date >= '2025-04-01'
        AND date < CURRENT_DATE()
        AND deletedAt IS NULL
        AND clientId = 'BS00'
        AND vehicleType NOT IN ('E-2w')
""")

dfMerged = pd.merge(
    dfTxns,
    dfPenaltyMonth,  
    on=['driverId', 'month'],
    how='left'
)

dfMerged = dfMerged.dropna(subset=['penalty_cohort'])

dfCohortUDT = dfMerged.groupby(['month', 'penalty_cohort'])['driverId'] \
                      .nunique() \
                      .reset_index(name='UDT')

dfCohortUDT_pivot = dfCohortUDT.pivot(index='penalty_cohort', columns='month', values='UDT') \
                               .fillna(0).astype(int)

dfCohortUDT_pivot

In [None]:
grouped = mergedTickets.groupby(['penalty_cohort', 'month', 'issue_name']).agg(
    ticketCount=('driverId', 'count'),
    UDRT=('driverId', 'nunique')
).reset_index()

grouped

In [None]:
pivoted = grouped.pivot_table(index='penalty_cohort',
                               columns=['month', 'issue_name'],
                               values=['ticketCount', 'UDRT'],
                               fill_value=0)

pivoted

In [None]:
reordered_cols = []
for month in pivoted.columns.levels[1]:
    for issue in pivoted.columns.levels[2]:
        if ('UDRT', month, issue) in pivoted.columns and ('ticketCount', month, issue) in pivoted.columns:
            reordered_cols.append(('UDRT', month, issue))
            reordered_cols.append(('ticketCount', month, issue))

pivoted = pivoted[reordered_cols]

pivoted

In [None]:
pivoted = pivoted.reset_index(drop=True)

pivoted

In [None]:
write("https://docs.google.com/spreadsheets/d/1mm81UqSeCTx166S2SzqNH63lhQemR5tPm8xK5sLO_6U/edit?gid=1251500267#gid=1251500267", 'penalty cohort analysis-2', pivoted)

In [None]:
pivoted.columns = [f"{month} | {issue} | {metric}" for metric, month, issue in pivoted.columns]

pivoted.reset_index(inplace=True)

pivoted

In [None]:
cohort_driver_counts = mergedTickets.groupby('penalty_cohort')['driverId'].nunique().reset_index(name='TotalDrivers')
final_df = pd.merge(cohort_driver_counts, pivoted, on='penalty_cohort', how='right')

final_df

In [None]:
final_df['UDT'] = 53499

cols = ['UDT'] + [col for col in final_df.columns if col != 'UDT']
final_df = final_df[cols]

final_df

In [None]:
for col in final_df.columns:
    if pd.api.types.is_categorical_dtype(final_df[col]):
        final_df[col] = final_df[col].astype(str)

final_df

In [None]:
write("https://docs.google.com/spreadsheets/d/1mm81UqSeCTx166S2SzqNH63lhQemR5tPm8xK5sLO_6U/edit?gid=1251500267#gid=1251500267", 'penalty cohort analysis', final_df)