## Data Analysis...

In [6]:
import tensorflow as tf

# Avoid OOM errors by setting GPU Memory Growth
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.set_logical_device_configuration(
                gpu,
                [tf.config.LogicalDeviceConfiguration(memory_limit=1024)])  # Example: Limit to 1GB
            #tf.config.experimental.set_memory_growth(gpu, True)  # Enable memory growth
        logical_gpus = tf.config.list_logical_devices('GPU')
        print(f"{len(gpus)} Physical GPUs, {len(logical_gpus)} Logical GPUs")
    except RuntimeError as e:
        print(e)


2025-01-05 12:10:06.925918: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-01-05 12:10:10.684385: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1736059211.592741   11233 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1736059211.803380   11233 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-01-05 12:10:14.093764: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

1 Physical GPUs, 1 Logical GPUs


I0000 00:00:1736059253.695484   11233 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 1024 MB memory:  -> device: 0, name: NVIDIA GeForce MX250, pci bus id: 0000:06:00.0, compute capability: 6.1


#### Python Libraries ...

In [7]:
import pandas as pd
import numpy as np

#### Load CSV ...

In [8]:
df = pd.read_csv(r'/home/malaka/Projects/CV_Projects/Crime_Data_Analysis_of_LAPD/Data_sets/Ready_dataset.csv')
df.head()

Unnamed: 0,DR_NO,Date Rptd,DATE OCC,TIME OCC,AREA,AREA NAME,Rpt Dist No,Part 1-2,Crm Cd,Crm Cd Desc,...,Temperature (°C),Dew point (°C),Humidity (%),Precipitation (mm),Wind Direction(degrees°),Windspeed (km/h),Air pressure (hPa),Sunshine total(min),Wind Gust (km/h),Snow depth(mm)
0,190326475,2020-03-01,2020-03-01,21:30,7,Wilshire,784,1,510,VEHICLE - STOLEN,...,15.85,3.6,44.0,0.0,245.0,9.4,1013.7,,,
1,200106753,2020-02-09,2020-02-08,18:00,1,Central,182,1,330,BURGLARY FROM VEHICLE,...,17.2,9.4,60.0,0.0,0.0,0.0,1017.2,,,
2,200320258,2020-11-11,2020-11-04,17:00,3,Southwest,356,1,480,BIKE - STOLEN,...,21.7,11.7,53.0,0.0,0.0,0.0,1018.7,,,
3,200907217,2023-05-10,2020-03-10,20:37,9,Van Nuys,964,1,343,SHOPLIFTING-GRAND THEFT ($950.01 & OVER),...,18.53,15.078333,80.55,0.461667,103.833333,7.6,1015.521667,,,
4,200412582,2020-09-09,2020-09-09,06:30,4,Hollenbeck,413,1,510,VEHICLE - STOLEN,...,22.2,17.8,76.0,0.0,0.0,0.0,1006.35,,,


#### Drop NaN columns ...

In this step, we will remove the columns 
1. 'Sunshine total(min)', 
2. 'Wind Gust (km/h)', 
3. 'Snow depth(mm)' ,
from the dataframe. 

These columns are not required for our analysis and because those are containes NaN values, dropping them will help in reducing the complexity of the dataset.

In [9]:
df = df.drop(['Sunshine total(min)',	'Wind Gust (km/h)',	'Snow depth(mm)'], axis=1)
df.sample(5)

Unnamed: 0,DR_NO,Date Rptd,DATE OCC,TIME OCC,AREA,AREA NAME,Rpt Dist No,Part 1-2,Crm Cd,Crm Cd Desc,...,Year,Month,Date/Time,Temperature (°C),Dew point (°C),Humidity (%),Precipitation (mm),Wind Direction(degrees°),Windspeed (km/h),Air pressure (hPa)
618182,220709847,2022-05-31,2022-05-25,12:00,7,Wilshire,715,1,420,THEFT FROM MOTOR VEHICLE - PETTY ($950 & UNDER),...,2022,5,2022-05-25 12:00:00,15.0,12.9,87.0,0.0,337.0,4.0,1011.0
523304,222114482,2022-09-20,2022-09-20,16:25,21,Topanga,2175,1,761,BRANDISH WEAPON,...,2022,9,2022-09-20 16:25:00,24.258333,15.891667,59.833333,0.0,0.0,0.0,1013.358333
631007,220106739,2022-02-17,2022-02-17,02:05,1,Central,152,2,623,BATTERY POLICE (SIMPLE),...,2022,2,2022-02-17 02:05:00,13.0,7.458333,69.25,0.0,207.833333,6.458333,1017.083333
1004,200810910,2020-06-20,2020-06-19,18:30,8,West LA,841,1,330,BURGLARY FROM VEHICLE,...,2020,6,2020-06-19 18:30:00,23.05,13.5,55.0,0.0,130.0,3.8,1013.2
867389,240315279,2024-10-13,2024-10-09,14:00,3,Southwest,358,1,522,"VEHICLE, STOLEN - OTHER (MOTORIZED SCOOTERS, B...",...,2024,10,2024-10-09 14:00:00,13.9,13.9,100.0,0.0,298.0,3.6,1013.2


In [10]:
# Example: Rainy days (assuming a certain threshold of precipitation)
df['Rainy Day'] = df['Precipitation (mm)'].apply(lambda x: 1 if x > 0 else 0)
df["Rainy Day"]


0         0
1         0
2         0
3         1
4         0
         ..
989324    0
989325    0
989326    0
989327    0
989328    0
Name: Rainy Day, Length: 989329, dtype: int64

In [11]:
df.sample(5)

Unnamed: 0,DR_NO,Date Rptd,DATE OCC,TIME OCC,AREA,AREA NAME,Rpt Dist No,Part 1-2,Crm Cd,Crm Cd Desc,...,Month,Date/Time,Temperature (°C),Dew point (°C),Humidity (%),Precipitation (mm),Wind Direction(degrees°),Windspeed (km/h),Air pressure (hPa),Rainy Day
807455,230110340,2023-04-10,2023-04-10,19:18,1,Central,132,2,624,BATTERY - SIMPLE ASSAULT,...,4,2023-04-10 19:18:00,22.91,12.45,51.7,0.0,69.0,4.44,1016.72,0
836555,231515870,2023-10-09,2023-10-06,19:45,15,N Hollywood,1585,1,510,VEHICLE - STOLEN,...,10,2023-10-06 19:45:00,32.475,12.65,30.0,0.0,233.25,5.4,1010.975,0
758201,230812432,2023-07-10,2023-07-10,16:35,8,West LA,884,1,230,"ASSAULT WITH DEADLY WEAPON, AGGRAVATED ASSAULT",...,7,2023-07-10 16:35:00,25.533333,13.633333,47.916667,0.0,0.0,0.0,1013.208333,0
130990,210804170,2021-01-06,2020-12-31,11:30,8,West LA,818,1,341,"THEFT-GRAND ($950.01 & OVER)EXCPT,GUNS,FOWL,LI...",...,12,2020-12-31 11:30:00,7.5,1.1,64.0,0.0,0.0,0.0,1016.9,0
510012,220620755,2022-12-29,2022-12-29,10:00,6,Hollywood,629,1,330,BURGLARY FROM VEHICLE,...,12,2022-12-29 10:00:00,14.0,11.2,83.0,0.0,100.0,9.0,1014.0,0
