In [41]:
import pandas as pd
import numpy as np
from datetime import datetime


# date = datetime.now().strftime('%d-%m-%Y')
date = '21-05-2025'
date

'21-05-2025'

In [42]:
old = pd.read_csv(f'input/{date}/fleet-device-mapping-20-05-2025.csv', dtype=str)
amenx_update = pd.read_csv(f'input/{date}/amnex-update_19-05-2025.csv', dtype=str)
chalo_update = pd.read_csv(f'input/{date}/chalo-update_19-05-2025.csv', dtype=str)

# Convert column names to snake case and remove special characters
old.columns = old.columns.str.lower().str.replace(
    '[^a-zA-Z0-9]', '_', regex=True).str.replace('__+', '_', regex=True).str.strip('_').str.strip()
amenx_update.columns = amenx_update.columns.str.lower().str.replace(
    '[^a-zA-Z0-9]', '_', regex=True).str.replace('__+', '_', regex=True).str.strip('_').str.strip()
chalo_update.columns = chalo_update.columns.str.lower().str.replace(
    '[^a-zA-Z0-9]', '_', regex=True).str.replace('__+', '_', regex=True).str.strip('_').str.strip()

In [43]:
old.info()
amenx_update.info()
chalo_update.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3386 entries, 0 to 3385
Data columns (total 10 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   dep             3386 non-null   object
 1   fleet           3386 non-null   object
 2   type            3384 non-null   object
 3   regndate        3386 non-null   object
 4   obu_iemi        3114 non-null   object
 5   chalo_deviceid  2050 non-null   object
 6   date            3386 non-null   object
 7   vehicle_number  2579 non-null   object
 8   depot           2579 non-null   object
 9   ny_device_id    8 non-null      object
dtypes: object(10)
memory usage: 264.7+ KB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 87 entries, 0 to 86
Data columns (total 4 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   fleet                87 non-null     object
 1   existing_amnex_iemi  76 non-null     object
 2   amnex_iemi_updated 

In [44]:
# Count rows where both obu_iemi and chalo_deviceid are NA
na_df = old[(old['obu_iemi'].isna()) & (old['chalo_deviceid'].isna())]
na_count = na_df.shape[0]
print(f"Number of rows where both obu_iemi and chalo_deviceid are NA: {na_count}")
na_df


Number of rows where both obu_iemi and chalo_deviceid are NA: 6


Unnamed: 0,dep,fleet,type,regndate,obu_iemi,chalo_deviceid,date,vehicle_number,depot,ny_device_id
1148,TN,K0776,LF-G,2025-02-14,,,2025-04-24T12:11:39,,,
1248,PA,I1044,ORDI,2007-12-26,,,2025-04-24T12:11:39,,,
1315,PA,I1281,ORDI,2007-12-31,,,2025-04-24T12:11:39,,,
2837,AM,K0197,LF-K,2024-09-12,,,2025-04-24T12:11:39,,,
3188,KB,K0548,LF-G,2024-11-19,,,2025-04-17T11:24:49,,,
3366,TA,K0726,LF-K,2024-12-16,,,2025-04-24T12:11:39,,,


In [45]:
old_copy = old.copy()

na_count = old_copy[(old_copy['obu_iemi'].isna()) & (
    old_copy['chalo_deviceid'].isna())].shape[0]
print(
    f"Number of rows where both obu_iemi and chalo_deviceid are NA: {na_count}")

Number of rows where both obu_iemi and chalo_deviceid are NA: 6


In [46]:
# Merge the dataframes on 'fleet' column to align indices
amenx_merged_df = pd.merge(old_copy, amenx_update[['fleet', 'amnex_iemi_updated']], 
                    on='fleet', how='left')

# Update obu_iemi where amnex_iemi_updated exists
old_copy['obu_iemi'] = np.where(
      (old_copy['fleet'] == amenx_merged_df['fleet']) & 
      (amenx_merged_df['amnex_iemi_updated'].notna()), 
    amenx_merged_df['amnex_iemi_updated'], old_copy['obu_iemi'])

In [47]:
amenx_merged_df

Unnamed: 0,dep,fleet,type,regndate,obu_iemi,chalo_deviceid,date,vehicle_number,depot,ny_device_id,amnex_iemi_updated
0,PR,J0236,ORDI,2013-06-14,862607059094994,,2025-04-17T11:24:49,TN01AN0628,Perambur Depot,,
1,AM,J0237,ORDI,2013-06-14,862607059102656,868728039343634,2025-04-17T11:24:49,TN01AN0625,Ambathur Depot,,
2,AN,J0238,ORDI,2013-06-14,862607059210541,,2025-04-17T11:24:49,TN01AN0632,Annanagar Depot,,
3,EN,J0239,ORDI,2013-06-14,862607052169082,869244044516361,2025-04-17T11:24:49,TN01AN0630,Ennore Depot,,
4,PR,J0240,ORDI,2013-06-14,862607055711781,869244044535106,2025-04-17T11:24:49,TN01AN0619,Perambur Depot,,
...,...,...,...,...,...,...,...,...,...,...,...
3381,PM,K0741,LF-K,2024-12-16,866041042272895,,2025-04-17T11:24:49,,,,
3382,PM,K0742,LF-K,2024-12-16,862607059117969,,2025-04-17T11:24:49,,,,
3383,PM,K0743,LF-K,2024-12-16,864337053466661,,2025-04-17T11:24:49,,,,
3384,PM,K0744,LF-K,2024-12-16,864513042810546,,2025-04-17T11:24:49,,,,


In [48]:
chalo_merged_df = pd.merge(old_copy, chalo_update[['fleet', 'chalo_iemi_updated']],
                           on='fleet', how='left')

old_copy['chalo_deviceid'] = np.where(
    (old_copy['fleet'] == chalo_merged_df['fleet']) &
    (chalo_merged_df['chalo_iemi_updated'].notna()),
  chalo_merged_df['chalo_iemi_updated'], old_copy['chalo_deviceid'])

In [51]:
na_count = old_copy[(old_copy['obu_iemi'].isna()) & (
    old_copy['chalo_deviceid'].isna())].shape[0]
print(
    f"Number of rows where both obu_iemi and chalo_deviceid are NA: {na_count}")

Number of rows where both obu_iemi and chalo_deviceid are NA: 1


In [50]:
old_copy.to_csv(f'output/{date}/fleet-device-mapping-{date}.csv', index=False)