In [1]:
import pandas as pd

# ✅ Step 1: Load the CSV (from Desktop)
file_path = r'C:\Users\Maaz\Desktop\fuel_utilization.csv'

df = pd.read_csv(file_path)

# ✅ Step 2: Convert Date Columns (if needed)
df['STATEMENT DATE'] = pd.to_datetime(df['STATEMENT DATE'], dayfirst=True)
df['TXN DATE'] = pd.to_datetime(df['TXN DATE'], dayfirst=True)

# ✅ Step 3: Convert TXN TIME (if it includes date + time)
df['TXN TIME'] = pd.to_datetime(df['TXN TIME'], dayfirst=True)

# ✅ Optional: If you only want time (HH:MM:SS)
df['TXN TIME'] = df['TXN TIME'].dt.time

# ✅ Step 4: Preview
print(df.head())


   S. NO.  ACCOUNT #      Billing ID STATEMENT DATE BILLING MONTH  \
0       1   22235046  22235046062025     2025-06-30   June - 2025   
1       2   22235046  22235046062025     2025-06-30   June - 2025   
2       3   22235046  22235046062025     2025-06-30   June - 2025   
3       4   22235046  22235046062025     2025-06-30   June - 2025   
4       5   22235046  22235046062025     2025-06-30   June - 2025   

             CARD #          CARD NAME   TXN DATE  TXN TIME  \
0  7002000300026062  PTCL BZC ABN 6483 2025-06-02  09:53:59   
1  7002000300026062  PTCL BZC ABN 6483 2025-06-11  07:51:33   
2  7002000300026062  PTCL BZC ABN 6483 2025-06-16  13:42:23   
3  7002000300026062  PTCL BZC ABN 6483 2025-06-20  14:20:22   
4  7002000300026062  PTCL BZC ABN 6483 2025-06-24  14:58:33   

                 RETAIL OUTLET        PRODUCT  ODOMETER READING  \
0  J.S ITTEFAQ FILLING STATION  Premier Euro5               NaN   
1  J.S ITTEFAQ FILLING STATION  Premier Euro5               NaN   
2  J.

In [None]:
import mysql.connector
from sqlalchemy import create_engine

# Connect using SQLAlchemy
engine = create_engine('mysql+mysqlconnector://root:your_password@localhost:3306/your_db_name')

# Write to MySQL
df.to_sql('fuel_utilization', con=engine, if_exists='replace', index=False)


In [3]:
# Clean column names
df.columns = df.columns.str.strip() \
                       .str.replace(' ', '_') \
                       .str.replace('.', '', regex=False) \
                       .str.replace('(', '', regex=False) \
                       .str.replace(')', '', regex=False) \
                       .str.replace('#', 'number') \
                       .str.lower()


In [4]:
print(df.head())
print(df.columns.tolist())


   s_no  account_number      billing_id statement_date billing_month  \
0     1        22235046  22235046062025     2025-06-30   June - 2025   
1     2        22235046  22235046062025     2025-06-30   June - 2025   
2     3        22235046  22235046062025     2025-06-30   June - 2025   
3     4        22235046  22235046062025     2025-06-30   June - 2025   
4     5        22235046  22235046062025     2025-06-30   June - 2025   

        card_number          card_name   txn_date  txn_time  \
0  7002000300026062  PTCL BZC ABN 6483 2025-06-02  09:53:59   
1  7002000300026062  PTCL BZC ABN 6483 2025-06-11  07:51:33   
2  7002000300026062  PTCL BZC ABN 6483 2025-06-16  13:42:23   
3  7002000300026062  PTCL BZC ABN 6483 2025-06-20  14:20:22   
4  7002000300026062  PTCL BZC ABN 6483 2025-06-24  14:58:33   

                 retail_outlet        product  odometer_reading  \
0  J.S ITTEFAQ FILLING STATION  Premier Euro5               NaN   
1  J.S ITTEFAQ FILLING STATION  Premier Euro5         

In [6]:
df[df['quantity_ltrs'] == 0][['card_number', 'amount_rs', 'retail_outlet', 'txn_date']]


Unnamed: 0,card_number,amount_rs,retail_outlet,txn_date
29,7002000300109363,8490.00,MALIK AND SONS,2025-06-28
121,7002000301463868,9305.00,SARFARAZ F/S,2025-06-04
212,7002000303427093,2480.00,WAQAS F/S - 111247,2025-06-18
501,7002000308325441,3690.00,WALI FILLING STATION - 117680,2025-06-17
723,7002000322585053,8000.00,MALIK AND SONS,2025-06-26
...,...,...,...,...
17292,7002000367816041,8623.25,KHYBER FILLING STATION,2025-06-25
17394,7002000380129091,3330.00,SAKHI DARBAR F.S,2025-06-16
17471,7002000382574963,9445.00,AHMAD FILLING STATION,2025-06-26
17495,7002000382625484,3620.00,SARDAR AZAM F.S,2025-06-30


In [7]:
# Group by card number and card name, sum amount
top_cards = df.groupby(['card_number', 'card_name']).agg({
    'amount_rs': 'sum'
}).sort_values(by='amount_rs', ascending=False).head(10)

# Reset index for clean DataFrame
top_cards = top_cards.reset_index()

print(top_cards)



        card_number          card_name  amount_rs
0  7002000309647280  PTCL BZC LHR LUBE  190685.00
1  7002000304860441      PTCLBZNAHE604  129990.57
2  7002000304307542  PTCL BZC LUBE FTR  123230.00
3  7002000385418648  PTCL BZC LUBE GTR  119646.30
4  7002000301901248    PTCL BZN TA 843  109734.00
5  7002000308601825     PTCLBZCLYA1100  106423.41
6  7002000306985550  PTCL BZN RLA 8305  103108.50
7  7002000388600168   PTCL BZC SAA 252  102942.00
8  7002000341684945     PTCLBZN SAJ370  102440.00
9  7002000349104748       PTCLBZNAS878  100526.40


In [8]:
# Save to Excel
output_path = r'C:\Users\Maaz\Desktop\top_10_fuel_cards.xlsx'  # You can change this path if needed

top_cards.to_excel(output_path, index=False)

print("Excel file saved successfully at:", output_path)


Excel file saved successfully at: C:\Users\Maaz\Desktop\top_10_fuel_cards.xlsx


In [10]:
# Group by card and sum amount + quantity
top_cards = df.groupby('card_number').agg({
    'amount_rs': 'sum',
    'quantity_ltrs': 'sum'
}).sort_values(by='amount_rs', ascending=False).head(10)

# Reset index to view as DataFrame
top_cards = top_cards.reset_index()

print(top_cards)

        card_number  amount_rs  quantity_ltrs
0  7002000309647280  190685.00            0.0
1  7002000304860441  129990.57          501.0
2  7002000304307542  123230.00            0.0
3  7002000385418648  119646.30            0.0
4  7002000301901248  109734.00          390.0
5  7002000308601825  106423.41          413.0
6  7002000306985550  103108.50          360.0
7  7002000388600168  102942.00          360.0
8  7002000341684945  102440.00          400.0
9  7002000349104748  100526.40          350.0


In [12]:
# Group by card number and card name, sum quantity
top_quantity_cards = df.groupby(['card_number', 'card_name']).agg({
    'quantity_ltrs': 'sum'
}).sort_values(by='quantity_ltrs', ascending=False).head(100)

# Reset index for clean display
top_quantity_cards = top_quantity_cards.reset_index()

print(top_quantity_cards)

         card_number           card_name  quantity_ltrs
0   7002000304860441       PTCLBZNAHE604          501.0
1   7002000308601825      PTCLBZCLYA1100          413.0
2   7002000341684945      PTCLBZN SAJ370          400.0
3   7002000301901248     PTCL BZN TA 843          390.0
4   7002000306985550   PTCL BZN RLA 8305          360.0
..               ...                 ...            ...
95  7002000367014316      PTCLBZCFDJ1655          200.0
96  7002000302576783    PTCL BZC SAA 067          200.0
97  7002000321141650      PTCLBZCGAJ1917          200.0
98  7002000364029085  PTCL BZC MNJ 20 14          200.0
99  7002000325155235        PTCLBZCJC548          200.0

[100 rows x 3 columns]


In [28]:
selected_cards = [
    '7002000324701831',
    '7002000369044147',
    '7002000307971260',
    '7002000325860164',
    '7002000388810627',
    '7002000349426141',
    '7002000367315044',
    '7002000340990186',
    '7002000306812937'
]



In [29]:
df['card_number'] = df['card_number'].astype(str)


In [32]:
# Filter the DataFrame for those cards only
filtered_df = df[df['card_number'].isin(selected_cards)]

# Group and summarize amount and quantity
card_summary = filtered_df.groupby(['card_number', 'card_name']).agg({
    'amount_rs': 'sum',
    'quantity_ltrs': 'sum'
}).reset_index()

print(card_summary)


        card_number         card_name  amount_rs  quantity_ltrs
0  7002000306812937      PTCLBZCMQ928    2551.90           10.0
1  7002000307971260   PTCL BZC LA2532   41692.00          160.0
2  7002000340990186  PTCL BZC SAA 281   19599.44           76.0
3  7002000349426141    PTCLBZC SAJ363    5090.80           20.0
4  7002000367315044    PTCLBZC SAJ364    2593.40           10.0
5  7002000369044147  PTCLBZCLET072985   17892.00           70.0
6  7002000388810627  PTCL BZC SAA 987   10301.60           40.0


In [19]:
print(df['card_number'].unique())


['7002000300026062' '7002000300029371' '7002000300075739' ...
 '7002000389906226' '7002000389906861' '7002000389908388']


In [31]:
# Make sure all card numbers are clean strings (no floats, no spaces)
df['card_number'] = df['card_number'].astype(str).str.strip()


In [33]:
filtered_df = df[df['card_number'].isin(selected_cards)]

card_summary = filtered_df.groupby(['card_number', 'card_name']).agg({
    'amount_rs': 'sum',
    'quantity_ltrs': 'sum'
}).reset_index()

print(card_summary)


        card_number         card_name  amount_rs  quantity_ltrs
0  7002000306812937      PTCLBZCMQ928    2551.90           10.0
1  7002000307971260   PTCL BZC LA2532   41692.00          160.0
2  7002000340990186  PTCL BZC SAA 281   19599.44           76.0
3  7002000349426141    PTCLBZC SAJ363    5090.80           20.0
4  7002000367315044    PTCLBZC SAJ364    2593.40           10.0
5  7002000369044147  PTCLBZCLET072985   17892.00           70.0
6  7002000388810627  PTCL BZC SAA 987   10301.60           40.0


In [34]:
df['card_number'] = df['card_number'].astype(str).str.strip()


In [35]:
print(df[df['card_number'] == '7002000325860164'])
print(df[df['card_number'] == '7002000324701831'])


Empty DataFrame
Columns: [s_no, account_number, billing_id, statement_date, billing_month, card_number, card_name, txn_date, txn_time, retail_outlet, product, odometer_reading, quantity_ltrs, amount_before_discount, discount, amount_rs]
Index: []
Empty DataFrame
Columns: [s_no, account_number, billing_id, statement_date, billing_month, card_number, card_name, txn_date, txn_time, retail_outlet, product, odometer_reading, quantity_ltrs, amount_before_discount, discount, amount_rs]
Index: []
