In [1]:
# Run to stitch the active dispatch tables together.  
# input: ../data/active_dispatch/[time code].csv  
# output: ../data/active_dispatch.csv  

In [3]:
import datetime
import pandas as pd
import os
import re

In [4]:
folder = '../data/active_dispatch/'
output = '../data/active_dispatch.csv'

In [7]:
# step 1 - merge!

# step 1a - loop through the files, merging them

In [9]:
ad = pd.DataFrame(columns = ['incident_type_code','incident_type_name','call_received_time','location',
                             'location_description','city_name','last_updated'])

In [11]:
files = [f for f in os.listdir(folder)]

print('starting...')

# processing file X of Y: filename.csv

x = 1
y = len(files)

for file in files :
    print(f"\rprocessing file {x} of {y}: {file}                  ", end="")
    next_df = pd.read_csv(folder + str(file))
    ad = pd.concat([ad, next_df])
    x += 1

print()
print('done!')

starting...
processing file 2527 of 2527: 1744649821000.csv                  
done!


In [12]:
print(len(ad)) # how many records?

30074


In [13]:
# make sure I know which columns have nulls...
nulls_per_column = ad.isnull().sum()
print(nulls_per_column)

incident_type_code          0
incident_type_name          0
call_received_time          0
location                    0
location_description    30074
city_name                   0
last_updated                0
dtype: int64


In [14]:
# step 1b - group by to get rid of the duplicates, keeping only the latest last_updated record
# Group By [incident_type_code], [incident_type_name], etc... EVERYTHING EXCEPT [last_updated]... use the MAX([last_updated])

In [15]:
ad = ad.groupby(['incident_type_code','incident_type_name','call_received_time',
                 'location','city_name'])['last_updated'].max().reset_index()

In [16]:
ad = ad.sort_values(by = 'call_received_time').reset_index()

In [17]:
ad

Unnamed: 0,index,incident_type_code,incident_type_name,call_received_time,location,city_name,last_updated
0,618,70A,RESIDENCE-BURGLARY ALARM,1743085667000,719 MYRTLE ST,EAST,1743103621000
1,1359,71A,NON-RESIDENCE-BURGLARY ALARM,1743085918000,3188 DICKERSON PIKE,EAST,1743103621000
2,2658,83P,SHOTS FIRED,1743093178000,1433 PENNOCK AVE,EAST,1743115680000
3,2616,71P,BURGLARY-NON-RESIDENCE BREAK-IN,1743094882000,360 WALLACE RD,PARAGON MILLS,1743110280000
4,619,70A,RESIDENCE-BURGLARY ALARM,1743095110000,1236 BRENTWOOD HIGHLANDS DR,NIPPERS CORNER,1743103621000
...,...,...,...,...,...,...,...
3478,22,51P,CUTTING OR STABBING IN PROGRESS,1744646589000,631 7TH AVE S,CENTRAL,1744647480000
3479,1357,70P,BURGLARY-RESIDENCE BREAK-IN,1744646715000,1211 BELL RD,ANTIOCH,1744648020000
3480,616,64P,CORPSE/D.O.A,1744646770000,910 33RD AVE N,NORTH,1744649821000
3481,617,64P,CORPSE/D.O.A,1744648594000,508 MAIN ST,EDGEFIELD,1744648741000


### step 2 - calculate and format!

In [19]:
processed_ad = pd.DataFrame(columns = ['code',        # incident_type_code
                                        'date',       # calculate from call_received_time (UNIX time stamp)
                                        'hr',         # calculate from call_received_time
                                        'time',       # calculate from call_received_time
                                        'address',    #	calculate from location + ", NASHVILLE, TN"
                                        'ert_mins'])  # (Estimated Resolution Time: ert) calculated from last_updated

dtypes = {
    'code': 'string',
    'date': 'string',       # preserve the format
    'hr': 'string',         # preserve the format, with leading 0 if there
    'time': 'string',
    'address': 'string',
    'ert_mins': 'int'
}

processed_ad = processed_ad.astype(dtypes)

In [46]:
def calculate_date(unix_timestamp) :
    converted = str(datetime.datetime.fromtimestamp(int(unix_timestamp)/1000))
    datepart = converted[0:10]
    hr = converted[11:13]
    timepart = converted[11:19]
    return datepart, hr, timepart

print(calculate_date('1743831541000'))
print(calculate_date('1743832801000'))
print(calculate_date('1743831945000'))
print(calculate_date('1743832081000'))


('2025-04-05', '00', '00:39:01')
('2025-04-05', '01', '01:00:01')
('2025-04-05', '00', '00:45:45')
('2025-04-05', '00', '00:48:01')


In [40]:
# find the difference of the times, and divide by 60000 to get minutes instead of milliseconds.  Add 4 minutes as an estimate to each max last_updated
def calculate_ert(first_time,last_time) :
    return round((int(last_time)-int(first_time)) / 60000) + 4

print(calculate_ert('1744326418000',1744326541000))
print(calculate_ert('1743829005000',1743830281000))

6
25


In [22]:
# processing row X (index) of Y: row[call_received_time]

x = 1
y = len(ad)

for index, row in ad.iterrows() :
    print(f"\rprocessing row {x} (index {index}) of {y}: call received: {row['call_received_time']}          ", end="")

    datepart, hr, timepart = calculate_date(row['call_received_time'])
    new_row = {'code': row['incident_type_code'],
               'date': datepart,
               'hr': hr,
               'time': timepart,
               'address': row['location'] + ', NASHVILLE, TN',
               'ert_mins': calculate_ert(row['call_received_time'],row['last_updated'])
               }
    processed_ad.loc[len(processed_ad)] = new_row
    x += 1

print()
print('done!')

processing row 3483 (index 3482) of 3483: call received: 1744649187000          
done!


In [23]:
# save!
processed_ad.to_csv(output, index=False)