In [2]:
import pandas as pd
import numpy as np
from datetime import datetime


date = datetime.now().strftime('%d-%m-%Y')
# date = '26-05-2025'
date

'30-05-2025'

In [3]:
cdf = pd.read_csv(f"./input/{date}/chalo.csv", dtype=str)
fdm = pd.read_csv(f"./input/{date}/fleet-device-mapping-30-05-2025.csv", dtype=str)


# Convert column names to snake case and remove special characters
cdf.columns = cdf.columns.str.lower().str.replace(
    '[^a-zA-Z0-9]', '_', regex=True).str.replace('__+', '_', regex=True).str.strip('_').str.strip()
fdm.columns = fdm.columns.str.lower().str.replace(
    '[^a-zA-Z0-9]', '_', regex=True).str.replace('__+', '_', regex=True).str.strip('_').str.strip()

In [4]:
print(len(cdf))
print(len(fdm))
cdf.info()
fdm.info()

1216
3386
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1216 entries, 0 to 1215
Data columns (total 4 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   sl_no               1216 non-null   object
 1   depot               1216 non-null   object
 2   fleet               1216 non-null   object
 3   chalo_updated_iemi  1216 non-null   object
dtypes: object(4)
memory usage: 38.1+ KB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3386 entries, 0 to 3385
Data columns (total 10 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   dep             3386 non-null   object
 1   fleet           3386 non-null   object
 2   type            3384 non-null   object
 3   regndate        3386 non-null   object
 4   obu_iemi        3125 non-null   object
 5   chalo_deviceid  2839 non-null   object
 6   date            3386 non-null   object
 7   vehicle_number  2579 non-null   object
 8   dep

In [5]:
# Drop chalo_deviceid column
# fdm['chalo_deviceid'] = None


In [6]:
mdf = fdm.merge(cdf, on='fleet', how='left')

In [7]:
mdf.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3386 entries, 0 to 3385
Data columns (total 13 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   dep                 3386 non-null   object
 1   fleet               3386 non-null   object
 2   type                3384 non-null   object
 3   regndate            3386 non-null   object
 4   obu_iemi            3125 non-null   object
 5   chalo_deviceid      0 non-null      object
 6   date                3386 non-null   object
 7   vehicle_number      2579 non-null   object
 8   depot_x             2579 non-null   object
 9   ny_device_id        8 non-null      object
 10  sl_no               1216 non-null   object
 11  depot_y             1216 non-null   object
 12  chalo_updated_iemi  1216 non-null   object
dtypes: object(13)
memory usage: 344.0+ KB


In [8]:
diff = mdf[(~mdf['chalo_updated_iemi'].isna()) & (
    mdf['chalo_deviceid'].astype(str).str.strip() != mdf['chalo_updated_iemi'].astype(str).str.strip())]
diff

Unnamed: 0,dep,fleet,type,regndate,obu_iemi,chalo_deviceid,date,vehicle_number,depot_x,ny_device_id,sl_no,depot_y,chalo_updated_iemi
0,AN,I0001,ORDI,2006-07-10,,,2025-04-24T11:57:08,,,,1,AN,869244042749626
1,AN,I0504,ORDI,2007-07-13,,,2025-04-17T11:24:49,,,,2,AN,869244044664427
2,AN,I0508,ORDI,2007-07-13,,,2025-04-24T11:57:08,,,,3,AN,869244044490302
3,AD,I0510,ORDI,2007-07-19,,,2025-04-17T11:24:49,,,,4,AD,869244044492688
4,CW,I0515,ORDI,2007-08-01,,,2025-04-17T11:24:49,,,,5,CW,869244044500415
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2926,TA,K0328,ORDB,2024-11-04,863958044840358,,2025-04-17T11:24:49,TN01AN4910,Tambaram Depot,,1201,TA,869244042781587
2934,PM,K0336,ORDB,2024-11-21,864513041440030,,2025-04-17T11:24:49,TN01AN4982,Poonamallee Depot,,1188,PM,869244044525693
2936,PM,K0338,ORDB,2024-11-14,862607059364314,,2025-04-17T11:24:49,TN01AN4878,Poonamallee Depot,,1187,PM,868728039251324
2937,PM,K0339,ORDB,2024-11-15,864513041470482,,2025-04-17T11:24:49,TN01AN4857,Poonamallee Depot,,1186,PM,869244044643900


In [9]:
diff = diff[['fleet', 'chalo_deviceid', 'chalo_updated_iemi']].rename(columns={
    'fleet': 'Fleet Number',
    'chalo_deviceid': 'Old Chalo ID',
    'chalo_updated_iemi': 'New Chalo ID'
})
diff.to_csv(f"./output/{date}/chalo-imei-update.csv", index=False)

In [10]:
fdm_update = fdm.copy()
fdm_update['chalo_deviceid'] = np.where((~mdf['chalo_updated_iemi'].isna()) & (
    mdf['chalo_deviceid'].astype(str).str.strip() != mdf['chalo_updated_iemi'].astype(str).str.strip()), mdf['chalo_updated_iemi'], fdm['chalo_deviceid'])
fdm_update.to_csv(
    f"./output/{date}/fleet-device-mapping-updated.csv", index=False)

In [11]:
# Check for duplicates in the obu_iemi file
duplicate_rows_df = fdm_update[fdm_update['obu_iemi'].notna() & fdm_update.duplicated(subset=[
    'obu_iemi'], keep=False)]
if len(duplicate_rows_df) > 0:
    print("\nFound duplicate OBU IMEI numbers in fleet-device-mapping.csv:")
    print(duplicate_rows_df)
else:
    print("\nNo duplicate OBU IMEI numbers found in fleet-device-mapping.csv")

duplicate_rows_df.to_csv(
    f"output/{date}/duplicate-obu-imei.csv", index=False)


Found duplicate OBU IMEI numbers in fleet-device-mapping.csv:
     dep  fleet  type    regndate         obu_iemi    chalo_deviceid  \
10    BB  I0709  ORDI  2007-08-17  864513041513471              None   
386   MV  I2922  ORDI  2010-03-01  862607059102813  869244044530974    
423   TD  I2968  ORDI  2010-04-05  864513041513471              None   
434   MV  I2992  ORDI  2010-04-19  862607059102813  358250330740993    
999   PM  J0484  ORDA  2015-10-09  862547054019788  869244044491375    
1741  IY  J1237  ORDR  2019-05-07  864650056121905  869244044496028    
1874  PM  J1370  ORDR  2019-07-03  862547054019788              None   
2392  IY  J1908  ORDR  2020-03-12  864650056121905  869244044496648    

                     date vehicle_number                  depot ny_device_id  
10    2025-04-17T11:24:49      TN01N4674      Basinbridge Depot          NaN  
386   2025-04-17T11:24:49      TN01N8861       Madahvaram Depot          NaN  
423   2025-04-17T11:24:49      TN01N9424     Tondia

In [12]:
# Check for duplicates in the chalo_deviceid column
duplicate_chalo_ids = fdm_update[fdm_update['chalo_deviceid'].notna(
) & fdm_update.duplicated(subset=['chalo_deviceid'], keep=False)]
if len(duplicate_chalo_ids) > 0:
    print("\nFound duplicate Chalo Device IDs:")
    print(duplicate_chalo_ids)
else:
    print("\nNo duplicate Chalo Device IDs found")

duplicate_chalo_ids.to_csv(
    f"output/{date}/duplicate-chalo-device-ids.csv", index=False)



No duplicate Chalo Device IDs found
