## CHICAGO DATASET

In [8]:
import time

offenseMap = {'THEFT': 0, 'BATTERY': 1, 'ASSAULT': 2, 'CRIMINAL DAMAGE': 3}
offenseSet = set()
latSet = set()
lonSet = set()
timeSet = set()
data = []

with open('/Users/martin/Desktop/STHSL/Datasets/CHI_crime/CHI_Crime.csv', 'r') as fs:
    fs.readline()  # Skip header
    for line in fs:
        arr = line.strip().split(',')
        if len(arr) < 4:
            continue  # Skip malformed lines

        try:
            timeArray = time.strptime(arr[0], '%m/%d/%Y %I:%M:%S %p')
            timestamp = time.mktime(timeArray)
            offense = offenseMap.get(arr[1])
            lat = float(arr[2])
            lon = float(arr[3])
        except Exception as e:
            continue  # Skip bad records

        if offense is None:
            continue

        latSet.add(lat)
        lonSet.add(lon)
        timeSet.add(timestamp)
        offenseSet.add(offense)

        data.append({
            'time': timestamp,
            'offense': offense,
            'lat': lat,
            'lon': lon
        })

# Stats report
print('\n--- Crime Data Statistics Report ---\n')
print('Total number of records:', len(data))
print('Unique offense types (encoded):', offenseSet)
print('Latitude range:', min(latSet), 'to', max(latSet))
print('Longitude range:', min(lonSet), 'to', max(lonSet))

lat_km = (max(latSet) - min(latSet)) * 111
lon_km = (max(lonSet) - min(lonSet)) * 84
print(f'Approx. area covered: {lat_km:.2f} km (N-S) x {lon_km:.2f} km (E-W)')

minTime = min(timeSet)
maxTime = max(timeSet)
print('Time range:')
print('  Start:', time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(minTime)))
print('  End  :', time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(maxTime)))



--- Crime Data Statistics Report ---

Total number of records: 321876
Unique offense types (encoded): {0, 1, 2, 3}
Latitude range: 41.644604096 to 42.022671246
Longitude range: -87.927881741 to -87.524529378
Approx. area covered: 41.97 km (N-S) x 33.88 km (E-W)
Time range:
  Start: 2016-01-01 00:00:00
  End  : 2017-12-31 23:50:00


## NEW YORK DATASET

In [10]:
print('\n--- NYC Crime Data Statistics Report ---\n')

print(f'Total number of records: {len(data)}')
print(f'Unique offense types (encoded): {offenseSet}')

minLat, maxLat = min(latSet), max(latSet)
minLon, maxLon = min(lonSet), max(lonSet)
print(f'Latitude range: {minLat:.5f} to {maxLat:.5f}')
print(f'Longitude range: {minLon:.5f} to {maxLon:.5f}')

lat_km = (maxLat - minLat) * 111
lon_km = (maxLon - minLon) * 84
print(f'Approx. area covered: {lat_km:.2f} km (N–S) × {lon_km:.2f} km (E–W)')

minTime = min(timeSet)
maxTime = max(timeSet)
print('Time range:')
print('  Start:', time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(minTime)))
print('  End  :', time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(maxTime)))



--- NYC Crime Data Statistics Report ---

Total number of records: 321876
Unique offense types (encoded): {0, 1, 2, 3}
Latitude range: 41.64460 to 42.02267
Longitude range: -87.92788 to -87.52453
Approx. area covered: 41.97 km (N–S) × 33.88 km (E–W)
Time range:
  Start: 2016-01-01 00:00:00
  End  : 2017-12-31 23:50:00
