# AI Solutions for intelligent vehicles:

## Importing Libraries needed for code
Update this as needed while working

In [9]:
import pandas as pd
import numpy as np
import warnings

# Supress warnings keeps the console clear
warnings.filterwarnings("ignore", category=RuntimeWarning)


## Importing the dataset needed for the project:
Efficiently load and manage the PVS dataset, which contains multiple sensor readings across different test scenarios (PVS1–PVS9). The dataset includes GPS data, inertial sensor readings (accelerometer, gyroscope, magnetometer), and combined GPS & MPU data for both left and right vehicle sensors.

In [10]:
# Load Labels (Target Variable)
labels = pd.read_csv("/kaggle/input/pvs-passive-vehicular-sensors-datasets/PVS 1/dataset_labels.csv")

# Load GPS Data
gps_data = pd.read_csv("/kaggle/input/pvs-passive-vehicular-sensors-datasets/PVS 1/dataset_gps.csv")

# Load MPU Data (Accelerometer, Gyroscope, Magnetometer)
mpu_left = pd.read_csv("/kaggle/input/pvs-passive-vehicular-sensors-datasets/PVS 1/dataset_mpu_left.csv")
mpu_right = pd.read_csv("/kaggle/input/pvs-passive-vehicular-sensors-datasets/PVS 1/dataset_mpu_right.csv")

# Load Combined GPS + MPU Data
gps_mpu_left = pd.read_csv("/kaggle/input/pvs-passive-vehicular-sensors-datasets/PVS 1/dataset_gps_mpu_left.csv")
gps_mpu_right = pd.read_csv("/kaggle/input/pvs-passive-vehicular-sensors-datasets/PVS 1/dataset_gps_mpu_right.csv")

In [11]:
# Display first few rows
print(labels.head())
print(gps_data.head())
print(mpu_left.head())

   paved_road  unpaved_road  dirt_road  cobblestone_road  asphalt_road  \
0           1             0          0                 0             1   
1           1             0          0                 0             1   
2           1             0          0                 0             1   
3           1             0          0                 0             1   
4           1             0          0                 0             1   

   no_speed_bump  speed_bump_asphalt  speed_bump_cobblestone  good_road_left  \
0              1                   0                       0               1   
1              1                   0                       0               1   
2              1                   0                       0               1   
3              1                   0                       0               1   
4              1                   0                       0               1   

   regular_road_left  bad_road_left  good_road_right  regular_road_right  

## Preprocessing the Data:
Cleaning and preparing data for analysis and models


### <u>Missing Values:<u>
* gps data:
    * ageofdgpsdata = 1467
    * dgpsid = 1467
    * activity = 1467
    * annotation = 1467
* mpu data left = none
* mpu data right = none
* combined gps and mpu data left = none
* combined gps and mpu data right = none

In [12]:
# Check for missing values
print("GPS data :")
print(gps_data.isnull().sum())
print("\nMPU data left:")
print(mpu_left.isnull().sum())
print("\nMPU data right:")
print(mpu_right.isnull().sum())
print("\nCombined GPS + MPU data left:")
print(gps_mpu_left.isnull().sum())
print("\nCombined GPS + MPU data right:")
print(gps_mpu_right.isnull().sum())

GPS data :
timestamp                     0
latitude                      0
longitude                     0
elevation                     0
accuracy                      0
bearing                       9
speed_meters_per_second       0
satellites                    0
provider                      0
hdop                          0
vdop                          0
pdop                          0
geoidheight                   0
ageofdgpsdata              1467
dgpsid                     1467
activity                   1467
battery                       0
annotation                 1467
distance_meters               0
elapsed_time_seconds          0
dtype: int64

MPU data left:
timestamp                  0
acc_x_dashboard            0
acc_y_dashboard            0
acc_z_dashboard            0
acc_x_above_suspension     0
acc_y_above_suspension     0
acc_z_above_suspension     0
acc_x_below_suspension     0
acc_y_below_suspension     0
acc_z_below_suspension     0
gyro_x_dashboard           0
g

### <u>Handling missing variables:<u>
the values missing in data_gps (1467 values in each column) means that for those columns all the values are missing, since they don't offer contribution to the dataset, they can be safely removed without causing any issues.

In [13]:
# Drop columns that are completely null
gps_data.drop(columns=['ageofdgpsdata', 'dgpsid', 'activity', 'annotation'], inplace=True)

# Verify that the columns were dropped
print("GPS data :")
print(gps_data.isnull().sum())

GPS data :
timestamp                  0
latitude                   0
longitude                  0
elevation                  0
accuracy                   0
bearing                    9
speed_meters_per_second    0
satellites                 0
provider                   0
hdop                       0
vdop                       0
pdop                       0
geoidheight                0
battery                    0
distance_meters            0
elapsed_time_seconds       0
dtype: int64


### __Assessing Quality of Data__
before doing in-depth handling and analysis.

In [18]:
print("GPS DATA: \n ", gps_data.describe())
print("----------")
print("\n MPU DATA LEFT: \n ", mpu_left.describe())
print("----------")
print("\n MPU DATA RIGHT: \n ", mpu_right.describe())
print("----------")
print("\n COMBINED GPS AND MPU DATA LEFT: \n ", gps_mpu_left.describe())
print("----------")
print("\n COMBINED GPS AND MPU DATA LEFT: \n ", gps_mpu_right.describe())


GPS DATA: 
            timestamp     latitude    longitude    elevation     accuracy  \
count  1.467000e+03  1467.000000  1467.000000  1467.000000  1467.000000   
mean   1.577219e+09   -27.695080   -51.119331   925.580673     4.092706   
std    4.307223e+02     0.011751     0.011376    40.691374     0.674720   
min    1.577219e+09   -27.717845   -51.132691   874.835101     4.000000   
25%    1.577219e+09   -27.702126   -51.128949   889.026553     4.000000   
50%    1.577219e+09   -27.689866   -51.124691   908.469321     4.000000   
75%    1.577220e+09   -27.687076   -51.109550   961.584807     4.000000   
max    1.577220e+09   -27.681820   -51.098840   995.974683    24.000000   

           bearing  speed_meters_per_second   satellites         hdop  \
count  1458.000000              1467.000000  1467.000000  1467.000000   
mean    213.629077                 9.286443    15.067485     0.811793   
std      95.640711                 7.820981     0.666034     0.037540   
min       1.006545 