In [4]:
import pandas as pd
import numpy as np

In [30]:
df = pd.read_csv('input/23-05-2025/fleet-device-mapping.csv', dtype=str).apply(lambda x: x.str.strip() if x.dtype == "object" else x)
del_df = pd.read_csv('input/23-05-2025/Inactive Obu List 22nd May.csv', dtype=str).apply(lambda x: x.str.strip() if x.dtype == "object" else x)

In [54]:
df.info()
del_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3386 entries, 0 to 3385
Data columns (total 10 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   Dep             3386 non-null   object
 1   Fleet           3386 non-null   object
 2   Type            3384 non-null   object
 3   Regndate        3386 non-null   object
 4   Obu Iemi        3124 non-null   object
 5   Chalo DeviceID  2840 non-null   object
 6   Date            3386 non-null   object
 7   Vehicle Number  2579 non-null   object
 8   Depot           2579 non-null   object
 9   Ny Device ID    8 non-null      object
dtypes: object(10)
memory usage: 264.7+ KB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 644 entries, 0 to 643
Data columns (total 3 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Fleet Number  644 non-null    object
 1   IMEI ID       644 non-null    object
 2   LAST PACKET   577 non-null    object
dtypes: 

In [48]:
# Remove trailing 'LF' characters from Fleet Number column
del_df['Fleet Number'] = del_df['Fleet Number'].str.rstrip('LF')
# Capitalize Fleet Number column
del_df['Fleet Number'] = del_df['Fleet Number'].str.upper()



In [49]:
print(f"Total number of devices in fleet-device-mapping.csv: {len(df)}")
print(f"Total number of devices in Inactive Obu List 22nd May.csv: {len(del_df)}")


Total number of devices in fleet-device-mapping.csv: 3386
Total number of devices in Inactive Obu List 22nd May.csv: 644


In [50]:
merged_df = pd.merge(df, del_df, left_on='Obu Iemi',
                    right_on='IMEI ID', how='left')

In [51]:
m = merged_df[~merged_df['IMEI ID'].isna()][['Fleet', 'Fleet Number', 'Obu Iemi','IMEI ID']]
m.to_csv('output/23-05-2025/inactive-obu-list.csv', index=False)

In [52]:
# Check for duplicates in the inactive OBU list
duplicate_rows = m[m.duplicated(keep=False)]
if len(duplicate_rows) > 0:
    print("Found duplicate rows:")
    print(duplicate_rows)
else:
    print("No duplicates found in the inactive OBU list")


No duplicates found in the inactive OBU list


In [53]:
# Check for mismatches between Fleet and Fleet Number
mismatched_rows = m[m['Fleet'] != m['Fleet Number']]
if len(mismatched_rows) > 0:
    print("\nFound rows where Fleet and Fleet Number don't match:")
    print(mismatched_rows)
else:
    print("\nNo mismatches found between Fleet and Fleet Number columns")



Found rows where Fleet and Fleet Number don't match:
      Fleet Fleet Number         Obu Iemi          IMEI ID
1874  J1370        J0484  862547054019788  862547054019788


In [58]:
# Filter out rows from df that have Obu Iemi values present in del_df['IMEI ID']
filtered_df = df[~df['Obu Iemi'].isin(del_df['IMEI ID'])]

# Save the filtered dataframe to CSV
filtered_df.to_csv('output/23-05-2025/filtered-fleet-device-mapping.csv', index=False)

print(f"\nRemoved {len(df) - len(filtered_df)} rows that had OBU IMEI numbers present in inactive OBU list")
print(f"Saved filtered data with {len(filtered_df)} rows to filtered-fleet-device-mapping.csv")



Removed 645 rows that had OBU IMEI numbers present in inactive OBU list
Saved filtered data with 2741 rows to filtered-fleet-device-mapping.csv


In [59]:
# Check for duplicates in the Chalo DeviceID column
duplicate_chalo_ids = filtered_df[filtered_df['Chalo DeviceID'].notna(
) & filtered_df.duplicated(subset=['Chalo DeviceID'], keep=False)]
if len(duplicate_chalo_ids) > 0:
    print("\nFound duplicate Chalo Device IDs:")
    print(duplicate_chalo_ids)
else:
    print("\nNo duplicate Chalo Device IDs found")


Found duplicate Chalo Device IDs:
     Dep  Fleet  Type    Regndate         Obu Iemi   Chalo DeviceID  \
134   CD  I1151  ORDI  2007-12-31              NaN  869244042737381   
208   AY  I1435  ORDI  2008-01-21              NaN  869244042818165   
310   AB  I1922  ORDI  2008-09-16              NaN  869244044507154   
586   CD  J0052   PHC  2011-11-29  867032053747452  869244042737381   
1214  AB  J0701  MINI  2016-03-01  864513042724465  869244044507154   
2046  AY  J1542  ORDR  2019-07-19  868142049699045  869244042818165   

                     Date Vehicle Number             Depot Ny Device ID  
134   2025-04-24T11:57:08            NaN               NaN          NaN  
208   2025-04-17T11:24:49            NaN               NaN          NaN  
310   2025-04-24T11:57:08            NaN               NaN          NaN  
586   2025-04-17T11:24:49      TN01N9869     Central Depot          NaN  
1214  2025-04-17T11:24:49     TN01AN1396  Adambakkam Depot          NaN  
2046  2025-04-17T11:24:

In [60]:
# Check for duplicates in the fleet-device-mapping file
duplicate_rows_df = filtered_df[filtered_df['Obu Iemi'].notna() & filtered_df.duplicated(subset=[
    'Obu Iemi'], keep=False)]
if len(duplicate_rows_df) > 0:
    print("\nFound duplicate OBU IMEI numbers in fleet-device-mapping.csv:")
    print(duplicate_rows_df)
else:
    print("\nNo duplicate OBU IMEI numbers found in fleet-device-mapping.csv")

duplicate_rows_df.to_csv(
    'output/23-05-2025/duplicate-obu-imei.csv', index=False)


Found duplicate OBU IMEI numbers in fleet-device-mapping.csv:
     Dep  Fleet  Type    Regndate         Obu Iemi   Chalo DeviceID  \
10    BB  I0709  ORDI  2007-08-17  864513041513471  869244044499592   
386   MV  I2922  ORDI  2010-03-01  862607059102813  869244044530974   
423   TD  I2968  ORDI  2010-04-05  864513041513471  869244044518045   
434   MV  I2992  ORDI  2010-04-19  862607059102813  358250330740993   
1741  IY  J1237  ORDR  2019-05-07  864650056121905  869244044496028   
2392  IY  J1908  ORDR  2020-03-12  864650056121905  869244044496648   

                     Date Vehicle Number                  Depot Ny Device ID  
10    2025-04-17T11:24:49      TN01N4674      Basinbridge Depot          NaN  
386   2025-04-17T11:24:49      TN01N8861       Madahvaram Depot          NaN  
423   2025-04-17T11:24:49      TN01N9424     Tondiarpet-1 Depot          NaN  
434   2025-04-17T11:24:49      TN01N9509       Madahvaram Depot          NaN  
1741  2025-04-17T11:24:49     TN01AN2304  Iy

In [61]:
# Combine duplicate OBU IMEI and Chalo Device ID rows into one file
duplicates_df = pd.concat([duplicate_rows_df, duplicate_chalo_ids]).drop_duplicates()

# Save combined duplicates to CSV
duplicates_df.to_csv('output/23-05-2025/all-duplicates.csv', index=False)

print(f"\nSaved {len(duplicates_df)} duplicate records to all-duplicates.csv")



Saved 12 duplicate records to all-duplicates.csv
