In [1]:
# Imports
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import re
from sklearn.cluster import KMeans

In [5]:
import pandas as pd

# Load the GPS data from the .log file, excluding the line (containing '}')
data_gps = pd.read_csv('./Data/3/GPS/gps_2023-05-04.log', sep=';', names=['Date', 'Time', 'Latitude', 'Longitude', 'Speed', 'Track Angle', 'Magnetic Variation'], skipfooter=1, engine='python')

# Data cleaning and formatting
data_gps['Latitude'] = data_gps['Latitude'].str.rstrip('N').astype(float)
data_gps['Longitude'] = data_gps['Longitude'].str.rstrip('E').astype(float)

# Use try-except block to handle errors while parsing date and time
try:
    data_gps['Date'] = pd.to_datetime(data_gps['Date'], format='%d/%m/%Y')
    data_gps['Time'] = pd.to_datetime(data_gps['Time'], format='%H:%M:%S').dt.time
except ValueError:
    # If an error occurs while parsing date and time, drop the row
    data_gps.dropna(inplace=True)

# Drop Magnetic Variation column as it is not relevant to the problem
data_gps.drop(columns=['Magnetic Variation'], inplace=True)

print(data_gps)


             Date      Time  Latitude  Longitude  Speed  Track Angle
1      03/05/2023  21:59:59   41.3647    2.17157   1.13       171.97
2      03/05/2023  22:01:01   41.3649    2.17184   0.05       100.92
3      03/05/2023  22:01:02   41.3649    2.17184   0.05       100.92
4      03/05/2023  22:01:03   41.3649    2.17184   1.03       179.14
5      03/05/2023  22:01:04   41.3649    2.17184   0.49       179.14
...           ...       ...       ...        ...    ...          ...
72403  04/05/2023  21:59:42   48.7848    6.13068  27.89       320.88
72404  04/05/2023  21:59:43   48.7849    6.13056  26.61       319.06
72405  04/05/2023  21:59:44   48.7850    6.13042  26.65       316.63
72406  04/05/2023  21:59:45   48.7851    6.13030  26.26       316.96
72407  04/05/2023  21:59:46   48.7852    6.13016  27.73       316.21

[72407 rows x 6 columns]


In [6]:
# Load the Shock data
data_shock3 = pd.read_csv('./Data/3/shock/shock_2023-05-04.log', sep=';', header=None)

# Add column headers
data_shock3.columns = ['Timestamp', 'AccelX', 'AccelY', 'AccelZ', 'GyroX', 'GyroY', 'GyroZ', 'Temperature']

# Convert timestamp to datetime format
data_shock3['Timestamp'] = pd.to_datetime(data_shock3['Timestamp'], unit='ns')

# Extract Date and Time from Timestamp
data_shock3['Date'] = data_shock3['Timestamp'].dt.date
data_shock3['Time'] = data_shock3['Timestamp'].dt.time

# Drop the original Timestamp column
data_shock3.drop(columns=['Timestamp'], inplace=True)

# Reorder the columns (optional)
data_shock3 = data_shock3[['Date', 'Time', 'AccelX', 'AccelY', 'AccelZ', 'GyroX', 'GyroY', 'GyroZ', 'Temperature']]

# Display the cleaned data
print(data_shock3)


# Note: From this data we can observe that shock data recor year is 1970. And GPS Data record date is 2023.

              Date             Time    AccelX    AccelY     AccelZ     GyroX  \
0       1970-01-01  00:28:03.151200 -0.004225 -0.008822   9.963903 -0.005192   
1       1970-01-01  00:28:03.151200  0.009082  0.001522   9.954070 -0.004887   
2       1970-01-01  00:28:03.151200 -0.007552  0.017937   9.945915 -0.004581   
3       1970-01-01  00:28:03.151200  0.007993 -0.002378   9.947039 -0.004887   
4       1970-01-01  00:28:03.151200  0.000886 -0.006175   9.942305 -0.004887   
...            ...              ...       ...       ...        ...       ...   
923367  1970-01-01  00:28:03.237599 -0.263614 -0.684888  10.483509 -0.011301   
923368  1970-01-01  00:28:03.237599  0.292846 -0.648380  10.558532  0.005803   
923369  1970-01-01  00:28:03.237599 -0.310100 -0.803945  10.072253 -0.024435   
923370  1970-01-01  00:28:03.237599  0.314995 -0.718664   9.313916  0.003054   
923371  1970-01-01  00:28:03.237599  0.243990 -0.744337   9.984593  0.009468   

           GyroY     GyroZ  Temperature

In [7]:
# Filter rows where the date is '2023-04-20'
data_shock3 = data_shock3[data_shock3['Date'] == '2023-04-20']

# Print Data
print(data_shock3)

Empty DataFrame
Columns: [Date, Time, AccelX, AccelY, AccelZ, GyroX, GyroY, GyroZ, Temperature]
Index: []
