In [3]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import pandas as pd
from pathlib import Path
import pyproj
from math import pi, cos, radians


In [4]:
info_files = list(Path().glob('data/GPSData/tripsInfo/*.csv'))
trip_files = list(Path().glob('data/GPSData/trips/*.csv'))

f"Found {len(info_files)} info files and {len(trip_files)} trip files"

'Found 73 info files and 73 trip files'

In [21]:
def area(df: pd.DataFrame):
    latitude = [df["Latitude"].min(), df["Latitude"].max(), df["Latitude"].max(), df["Latitude"].min()]
    longitude = [df["Longitude"].min(), df["Longitude"].min(), df["Longitude"].max(), df["Longitude"].max()]
    print(latitude)
    print(longitude)

    earth_radius = 6371009 # in meters
    lat_dist = pi * earth_radius / 180.0

    y = [lat * lat_dist for lat in latitude]
    x = [long * lat_dist * cos(radians(lat)) 
                for lat, long in zip(latitude, longitude)]
    
    area = 0.0
    for i in range(-1, len(x)-1):
        area += x[i] * (y[i+1] - y[i-1])
    return abs(area) / 2.0

In [5]:
def concat_df(files):
    li = []

    for filename in files:
        df = pd.read_csv(filename, index_col=None, header=0)
        li.append(df)

    frame = pd.concat(li, axis=0, ignore_index=True)
    return frame.reset_index(drop=True)

In [6]:

info_df = concat_df(info_files)
trip_df = concat_df(trip_files)

time_stamp_format1 = pd.to_datetime(trip_df["Timestamp"], errors="coerce")
time_stamp_format2 = pd.to_datetime(trip_df["Timestamp"], errors="coerce", format="%Y-%m-%d %H:%M:%S%z")
mask = time_stamp_format1.isna()

time_stamp_format1[mask] = time_stamp_format2[mask]

trip_df["Timestamp"] = time_stamp_format1

display(info_df.head(), trip_df.head())

0           False
1           False
2           False
3           False
4           False
            ...  
10385566     True
10385567     True
10385568     True
10385569     True
10385570     True
Name: Timestamp, Length: 10385571, dtype: bool

0           True
1           True
2           True
3           True
4           True
            ... 
10385566    True
10385567    True
10385568    True
10385569    True
10385570    True
Name: Timestamp, Length: 10385571, dtype: bool

In [None]:
# display(info_df.min(), info_df.max())
display(trip_df.min(), trip_df.max())

TripLogId              6225a07a03d57f0000de776a
Timestamp      2022-03-07 06:04:41.869000+00:00
Latitude                              59.546578
Longitude                             10.091785
Uncertainty                                 1.0
dtype: object

TripLogId              644ab4008fe0870000160317
Timestamp      2023-04-30 08:24:59.035000+00:00
Latitude                              60.333155
Longitude                             11.422569
Uncertainty                               198.0
dtype: object

In [None]:
display(trip_df.isna().sum(), info_df.isna().sum(), len(info_df))

TripLogId           0
Timestamp      542464
Latitude            0
Longitude           0
Uncertainty         0
dtype: int64

TripLogId                  0
DumperMachineNumber      741
MachineType                0
LoadLongitude              0
LoadLatitude               0
DumpLongitude              0
DumpLatitude               0
MassTypeMaterial           0
Quantity                   0
DumperMachineName      31569
dtype: int64

32310

In [None]:
f"Found {len(trip_df['TripLogId'].unique())} unique trips in the trip logs and {len(info_df)} trips with info"

'Found 32150 unique trips in the trip logs and 32310 trips with info'

In [None]:
display(info_df.dtypes, trip_df.dtypes)

TripLogId               object
DumperMachineNumber    float64
MachineType             object
LoadLongitude          float64
LoadLatitude           float64
DumpLongitude          float64
DumpLatitude           float64
MassTypeMaterial        object
Quantity               float64
DumperMachineName       object
dtype: object

TripLogId                   object
Timestamp      datetime64[ns, UTC]
Latitude                   float64
Longitude                  float64
Uncertainty                float64
dtype: object

In [None]:
def convert_coordinates(lat, lon):
    utm_converter = pyproj.Proj(init="epsg:5110")
    easting, northing = utm_converter(lon, lat)
    return easting, northing

trip_df[['Easting', 'Northing']] = trip_df.apply(lambda row: pd.Series(convert_coordinates(row['Latitude'], row['Longitude'])), axis=1)
trip_df.head()

In [None]:
import numpy as np
grouped_df = trip_df.groupby("TripLogId")
unique = set(trip_df["TripLogId"].unique())
def get_trips(id: str):
    return {'route': grouped_df.get_group(id).drop("TripLogId", axis=1).values} if id in unique else None

In [None]:
pd.Series(get_trips('6225a07a03d57f0000de776a'))

(466, 4)


route    [[2022-03-07 06:04:41.869000+00:00, 59.9464884...
dtype: object

In [None]:
info_df["DumperMachineName"].unique()

array([nan, 'Volvo A45 (4060) 12324060', 'A45 FS (3834) 12323834',
       'Scania R580 (AJ91132)', 'Mercedes Arocs (DR67820)',
       'Scania R590 (AJ94392) AJ94392 ', 'Mercedes (SD89781) 2763',
       'Scania R580 AJ91826', 'Scania R590 AJ94391',
       'Scania R580 (PD 70495)', 'Scania R580 (AJ90818)',
       'SCANIA R 520 (PD 69848)', 'Mercedes Arocs (SD95898) 2902',
       'Volvo A45G FS (3834) 12323834', 'Cat 745 B ( 1484 ) 12321484',
       'SCANIA R490 8x4 4AKSLET 2505', 'Scania 590 (AJ94391)',
       'Scania R540 AJ94080', 'Scania R 580 (PD 69849)', 'PD 69848'],
      dtype=object)

In [None]:
combined_df = info_df.copy()
combined_df['route'] = combined_df.apply(lambda row: pd.Series(get_trips(row["TripLogId"])), axis=1)

In [None]:
machine_groups = combined_df.groupby("DumperMachineNumber")

for machine in info_df['DumperMachineNumber'].unique()[:2]:
    machine_df: pd.DataFrame = machine_groups.get_group(machine)
    for index, row in machine_df.iterrows():
        row.to_dict()
        positions = []
        print(row)
        break
    break

TripLogId                                       6225a07a03d57f0000de776a
DumperMachineNumber                                                 20.0
MachineType                                                        Truck
LoadLongitude                                                   10.38603
LoadLatitude                                                   59.946488
DumpLongitude                                                  10.324452
DumpLatitude                                                    59.97659
MassTypeMaterial                                                   Stone
Quantity                                                            16.0
DumperMachineName                                                    NaN
route                  [[2022-03-07 06:04:41.869000+00:00, 59.9464884...
Name: 0, dtype: object


In [None]:
info_df["DumpLatitude"].apply(lambda row: round(row, 3)).unique()

array([59.977, 59.94 , 59.951, 59.938, 59.976, 59.95 , 59.978, 59.947,
       59.979, 59.953, 59.939, 59.952, 59.974, 59.937, 59.946, 59.964,
       59.975, 59.948, 59.944, 59.972, 59.966, 59.973, 59.941, 59.967,
       59.969, 59.906, 59.962, 59.943, 59.942, 59.954, 59.945, 59.908,
       59.961, 59.971, 59.96 , 59.968, 59.955, 59.97 , 59.957, 59.949,
       60.083, 59.963, 59.907, 59.931, 60.032, 59.936, 59.902, 60.13 ,
       59.956, 59.958, 60.131, 59.965, 60.174, 60.188, 60.257, 60.091])

In [2]:
import numpy as np

def calculate_differences(arr):
    differences = np.diff(arr)
    return differences

# Example usage:
arr = np.array([1, 4, 7, 2, 9])
result = calculate_differences(arr)
print(result)

[ 3  3 -5  7]


In [13]:
name_df = info_df[["DumperMachineNumber", "DumperMachineName"]].drop_duplicates().dropna()
name_df.head(n=76)

Unnamed: 0,DumperMachineNumber,DumperMachineName
