In [1]:
#Algorithm to detect both load and dump

In [2]:
import dataloader
from pydantic import BaseModel
import typing
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px
import geopy.distance
from datetime import datetime, timedelta
from sklearn.cluster import KMeans
from tqdm import tqdm
import pandas as pd
from schemas import Machine, Position

In [3]:
day = '04-27-2023' #DD-MM-YYYY
machine_type = 'Truck'

In [4]:
#Class setting parameters determining when we predict load/dump

class criteria(BaseModel):
    optimal_K: int = 50                     # Nb of clusters for work areas
    meters_from_area: int = 30              # Radius from a cluster center
    seconds_for_vector: int = 10            # "Length" of vector used to determine if vehicle is reversing
    
    speed_limit: int = 30                   # Cannot be loading or dumping if speed higher than this. Don't like such a high number.
    meters_since_last_activity: int = 500   # Meters driven since last load/dump
    
    minutes_load: int = 3                   # Look at distance driven last x minutes before a possible load
    max_sum_last_x_minutes_load: int = 1000 # Max meters driven during the last x minutes
    minutes_dump: int = 3                   # Look at distance driven last x minutes before a possible load
    max_sum_last_x_minutes_dump: int = 1000 # Max meters driven during the last x minutes
    
    inner_prod_threshold: float = 0.80      # A threshold to pick up possible reversal

criterias = criteria()

In [5]:
day = '04-27-2023' #DD-MM-YYYY
machine_type = 'Truck'
#Loading gps data for selected day and day before
trip = dataloader.TripsLoader(day)

#Use previous day data to create clustering

# Convert the date string to a datetime object
date_obj = datetime.strptime(day, "%m-%d-%Y")
# Subtract one day using timedelta
new_date = date_obj - timedelta(days=1)
# Format the new date back to the desired format
day_before = new_date.strftime("%m-%d-%Y")

trip_day_before = dataloader.TripsLoader(day_before)

Could not add row, row was type:  <class 'float'>  but expected ndarray.
Could not add row, row was type:  <class 'float'>  but expected ndarray.
Could not add row, row was type:  <class 'float'>  but expected ndarray.
Could not add row, row was type:  <class 'float'>  but expected ndarray.
Could not add row, row was type:  <class 'float'>  but expected ndarray.
Could not add row, row was type:  <class 'float'>  but expected ndarray.
Could not add row, row was type:  <class 'float'>  but expected ndarray.
Could not add row, row was type:  <class 'float'>  but expected ndarray.
Could not add row, row was type:  <class 'float'>  but expected ndarray.
Could not add row, row was type:  <class 'float'>  but expected ndarray.
Could not add row, row was type:  <class 'float'>  but expected ndarray.
Could not add row, row was type:  <class 'float'>  but expected ndarray.
Could not add row, row was type:  <class 'float'>  but expected ndarray.
Could not add row, row was type:  <class 'float'>  

In [6]:
def generate_load_dump_clusters():
    all_load_positions_for_day_before = []
    all_dump_positions_for_day_before = []
    for machine_number in trip_day_before._machines.keys():
        temp_machine = trip_day_before._machines[machine_number]
        if temp_machine.machine_type == machine_type:
            all_load_positions_for_day_before.append([trip.load_latlon for trip in temp_machine.trips])
            all_dump_positions_for_day_before.append([trip.dump_latlon for trip in temp_machine.trips])

    all_load_positions_for_day_before =  [item for sublist in all_load_positions_for_day_before for item in sublist]
    all_dump_positions_for_day_before = [item for sublist in all_dump_positions_for_day_before for item in sublist]

    # Assuming 'coordinates' is list of tuples [(lat1, lon1), (lat2, lon2), ...]
    load_coordinates_array = np.array(all_load_positions_for_day_before)
    dump_coordinates_array = np.array(all_dump_positions_for_day_before)

    # Fit the KMeans model with the optimal K value
    load_kmeans = KMeans(n_clusters=criterias.optimal_K, random_state=42, n_init='auto')
    dump_kmeans = KMeans(n_clusters=criterias.optimal_K, random_state=42, n_init='auto')
    load_kmeans.fit(load_coordinates_array)
    dump_kmeans.fit(dump_coordinates_array)

    # Get the coordinates of the cluster centers for the optimal K value
    load_cluster_centers = load_kmeans.cluster_centers_
    dump_cluster_centers = dump_kmeans.cluster_centers_

    return load_cluster_centers, dump_cluster_centers

In [7]:
# Get the coordinates of the cluster centers
load_cluster_centers, dump_cluster_centers = generate_load_dump_clusters()

In [8]:
class points_times(BaseModel):
    points: list[tuple[float, float]] = []
    times: list[datetime] = []

class predicted_load_dump(BaseModel):
    load: points_times = points_times()
    dump: points_times = points_times()

class stats(BaseModel): #Represents actual data
    all_positions: list[Position] = []  # Positions recorded during a day
    load: points_times = points_times() # Load points and times
    dump: points_times = points_times() # Dump points and times
    day_speeds: list[float] = []        # Speeds
    day_acceleration: list[float] = []
    day_dists: list[float] = []         # Distances between each recording
    day_times: list[datetime] = []      # Timestamp for two above lists
    inner_prods: list[float] = []       # Inner product of consecutive normalized driving vectors
    # lat1_minus_lat0 = []
    # lon1_minus_lon0 = []


class automated_load_dump_for_machine():

    def __init__(self, 
                 machine_data: Machine): 
                #  load_cluster_centers: typing.Any,# Both should be list[tuple[float, float]], should implement 
                #  dump_cluster_centers: typing.Any) -> None:
        
        self.machine = machine_data
        self.predicted = predicted_load_dump()
        self.stats = stats()

        all_pos = [trips.positions for trips in self.machine.trips]
        self.stats.all_positions = [item for sublist in all_pos for item in sublist]
        self.stats.load.points = [trips.load_latlon for trips in self.machine.trips]
        self.stats.load.times = [trips.positions[0].timestamp for trips in self.machine.trips]
        self.stats.dump.points = [trips.dump_latlon for trips in self.machine.trips]
        
        actual_dump_times = []
        for t in self.machine.trips:        #Not pretty, because we don't have dump time in trip info by default
            temp_dump_laton = t.dump_latlon # Must match latlons
            for position in t.positions:
                if temp_dump_laton == (position.lat, position.lon):
                    actual_dump_times.append(position.timestamp)
                    break
        self.stats.dump.times = actual_dump_times
               
        # self.meters_from_last_act = [] 
        # self.seconds_since_last_act = [] 
        # self.is_next_load = []          


    def get_data(self):
        
        #We keep track of how many meters we have driven since last dump or load    
        meters_since_last_activity = 0
        time_since_last_activity = 0
        # add initial values where we have the load
        # self.meters_from_last_act.append(0)
        # self.seconds_since_last_act.append(0)
        # self.is_next_load.append(0)
        self.stats.day_times.append(self.stats.all_positions[0].timestamp)
        # speed is added in the loop

        #We start predicting. Are going to iterate over all positions, from first to last
        ###
        ###
        for i in range(1,len(self.stats.all_positions)-1):
            
            next_pos = self.stats.all_positions[i+1]
            current_pos = self.stats.all_positions[i]
            prev_pos = self.stats.all_positions[i-1]

            #Meters driven since last timestamp
            meters_driven = geopy.distance.geodesic((current_pos.lat, current_pos.lon), (prev_pos.lat, prev_pos.lon)).m
            
            meters_since_last_activity += meters_driven


            #Meters driven since last timestamp

            # this is the speed at point i-1 (forward derivative)
            #Seconds passed since last timestamp
            seconds_gone_i_minus_1 = (current_pos.timestamp.to_pydatetime()-prev_pos.timestamp.to_pydatetime()).total_seconds()
            time_since_last_activity += seconds_gone_i_minus_1

            seconds_gone_i = (next_pos.timestamp.to_pydatetime()-current_pos.timestamp.to_pydatetime()).total_seconds()
            meters_driven_i = geopy.distance.geodesic((next_pos.lat, next_pos.lon), (current_pos.lat, current_pos.lon)).m
            # if time duplicates, use a speed equal NaN
            try:
                speed_ms_i_minus_1 = meters_driven/seconds_gone_i_minus_1  # m/s
                speed_ms_i = meters_driven_i/seconds_gone_i # m/s
                
                # speed_kmh_i_minus_1 = speed_ms_i_minus_1*3.6  # km/h
                # speed_kmh_i = (speed_ms_i)*3.6                # km/h
                acceleration_i_minus_1 = (speed_ms_i - speed_ms_i_minus_1) / (seconds_gone_i_minus_1) # m/s^2
            except ZeroDivisionError:
                # speed_kmh_i_minus_1 = np.nan
                speed_ms_i_minus_1 = np.nan
                acceleration_i_minus_1 = np.nan
            
            self.stats.day_acceleration.append(acceleration_i_minus_1)  # m/s^2
            self.stats.day_speeds.append(speed_ms_i_minus_1)           # m/s
            
            self.stats.day_times.append(current_pos.timestamp)
            

            # if we have either load or dump, distance and time from last activity is set to 0
            for sublist in [self.stats.load.points, self.stats.dump.points]:
                if (self.stats.all_positions[i].lat, self.stats.all_positions[i].lon) in sublist:#, self.stats.dump.points]:
                    meters_since_last_activity=0
                    time_since_last_activity = 0
                    
            # self.seconds_since_last_act.append(time_since_last_activity)
            
            # self.meters_from_last_act.append(meters_since_last_activity/1000) # km

            

        
    def get_df_with_ml_data(self):
        load_times_set = set(self.stats.load.times)
        dump_times_set = set(self.stats.dump.times)
        positions = self.stats.all_positions
        latitude = [sublist.lat for sublist in positions]
        longitude = [sublist.lon for sublist in positions]
        uncertainty = [sublist.uncertainty for sublist in positions]
        lat1_minus_lat0 = [latitude[i] - latitude[i-1] for i in range(1,len(latitude))]
        lon1_minus_lon0 = [longitude[i] - longitude[i-1] for i in range(1,len(longitude))]
        # append some value to be removed after df is constructed
        lat1_minus_lat0.append(lat1_minus_lat0[-1])
        lon1_minus_lon0.append(lon1_minus_lon0[-1])
        speed_north_south = np.zeros_like(np.array(latitude))
        speed_east_west = np.zeros_like(np.array(latitude))


        # add some speed to the day_speeds list, as we we dont have the speed of the last data point ( see for loop)
        # we have to add a value as the speed in the last point is not defined as it uses forward derivative
        for _ in range(2):
            self.stats.day_speeds.append(np.nan)
            self.stats.day_acceleration.append(np.nan)
        for idx in range(1,len(latitude)-1):
            try:
                total_seconds = (self.stats.day_times[idx].to_pydatetime() - self.stats.day_times[idx-1].to_pydatetime()).total_seconds()
                speed_east_west[idx-1] = ((longitude[idx] - longitude[idx-1]   ) / total_seconds)
                speed_north_south[idx-1] = ((latitude[idx] - latitude[idx-1]   ) / total_seconds)
            except ZeroDivisionError:
                speed_east_west[idx-1] = np.nan
                speed_north_south[idx-1] = np.nan

        # add another day time as the for loop excludes the last value
        # this value will be removed anyways
        self.stats.day_times.append(self.stats.day_times[-1])
        load = [time in load_times_set for time in self.stats.day_times]
        dump = [time in dump_times_set for time in self.stats.day_times]
        # return True if either dump or load is True
        output_labels = [d or l for d, l in zip(dump, load)]
        
        for i in range(len(output_labels)):
            current_time = self.stats.day_times[i]  # The current timestamp
            if current_time in self.stats.load.times:
                output_labels[i] = "Load"
            elif current_time in self.stats.dump.times:
                output_labels[i] = "Dump"
            else:
                output_labels[i] = "Driving"

        # #  is_next_load takes 1 if True and 0 if False
        # # we have already added 0 to the list as the first value is Load
        # previous_was_load = True
        # for label in output_labels[1:]:
        #     if label == "Load":
        #         previous_was_load = True
        #     if label == "Dump":
        #         previous_was_load = False
        #     if previous_was_load:
        #         self.is_next_load.append(0)
        #     if not previous_was_load:
        #         self.is_next_load.append(1)

        
        
        
        # print('len(speed) :', len(self.stats.day_speeds))
        # print('len(acceleration)', len(self.stats.day_acceleration))
        # print('len lon: ', len(longitude))
        # print('len lat:' , len(latitude))
        # print('Load', len(load))
        # print('dump', len(dump))
        # print('datetime', len(self.stats.day_times))
        # print('machine id', self.machine.machine_id)
        # # print('meters from last act', len(self.meters_from_last_act))
        # print("output_labels", len(output_labels))
        # print("is_next_load", self.is_next_load)
        # print("seconds from last act", len(self.seconds_since_last_act))
        #  create Dataframe with the given variables
        # we dont have the speed for the last datapoint as it uses forward derivative scheme
        df = pd.DataFrame({
            "MachineID": [self.machine.machine_id]*len(self.stats.day_times),
            "DateTime": self.stats.day_times,
            # "Time_from_start": [(time.min - self.stats.day_times[0].min) for time in self.stats.day_times],
            "Speed": self.stats.day_speeds,
            "Acceleration": self.stats.day_acceleration,
            # "Inner_products": self.stats.inner_prods,
            "Latitude": latitude,
            "Longitude": longitude,
            "Uncertainty": uncertainty,
            "Lat1_minus_lat0": lat1_minus_lat0,
            "Lon1_minus_lon0": lon1_minus_lon0,
            "speed_north_south": speed_north_south,
            "speed_east_west": speed_east_west,
            # "km_from_last_event": self.meters_from_last_act,
            # "seconds_from_last_event": self.seconds_since_last_act,
            # "is_next_load": self.is_next_load,
            "output_labels": output_labels
        })
        
        # filter df to remove the rows after the last dump
        last_row = df.query('output_labels == "Dump"').index[-1]
        
        df = df.loc[:last_row]
        
        return df
     


In [9]:
import os
from sklearn.model_selection import train_test_split

days = [csv_file.split(".csv")[0] for csv_file in os.listdir("data/GPSData/trips")]
machine_type = "Truck"

def save_dfs_with_ml_data() -> None:
    """
    Get all the wanted data (speed, vector) for all vehicles at all days
    """
    n_days = 1
    df_training_all = pd.DataFrame()
    df_testing_all = pd.DataFrame()
    for day in days[:n_days]:
        trip = dataloader.TripsLoader(day)
        for unique_vehicle in trip._machines.keys():
            temp_machine = trip._machines[unique_vehicle]
            if temp_machine.machine_type == machine_type:
                # machine_of_interest = trip._machines[unique_vehicle]
                automated_for_given_machine = automated_load_dump_for_machine(temp_machine)
                automated_for_given_machine.get_data()
                df_vehicle = automated_for_given_machine.get_df_with_ml_data()
                
                X, y = df_vehicle.drop(["output_labels"],axis=1), df_vehicle["output_labels"]
                # each vehicle should be represented 20% for each day in the test data
                X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=40)

                df_training = pd.concat([X_train,y_train], axis=1).sort_values(by="DateTime")
                # grouped = df_training.groupby(df_training.index //3)
                # df_training.drop("DateTime")
                # df_training.insert(1,"DateTime start": d)

                df_training_all = pd.concat([df_training_all, df_training], axis=0)

                # add the training and testing data to the total dataframe by row
                df_testing = pd.concat([X_test, y_test], axis=1).sort_values(by="DateTime")
                df_testing_all = pd.concat([df_testing_all, df_testing], axis=0)

    
    df_training_all.dropna(inplace=True)
    df_testing_all.dropna(inplace=True)
    
    # df_training_all.to_csv(f"data/ml_model_data/training_data/train_{n_days}_days_all_trucks_multi_new_feat.csv", sep=',', index=False)
    # df_testing_all.to_csv(f"data/ml_model_data/testing_data/test_{n_days}_days_all_trucks_multi_new_feat.csv", sep=',', index=False)
    
    # df_training_all.to_csv(f"data/ml_model_data/training_data/train_{n_days}_days_all_trucks_merged_datetimes.csv", sep=',', index=False)
    # df_testing_all.to_csv(f"data/ml_model_data/testing_data/test_{n_days}_days_all_trucks_merged_datetimes.csv", sep=',', index=False)



save_dfs_with_ml_data()

In [10]:
# def merge_datapoints()->None:
#     df_training = pd.read_csv(f"data/ml_model_data/training_data/train_10_days_all_trucks_multi_new_feat.csv", sep=',')
#     df_testing = pd.read_csv(f"data/ml_model_data/testing_data/test_10_days_all_trucks_multi_new_feat.csv", sep=',')
#     # merge dfs again and include only the first two columns
#     df_all = pd.concat([df_training, df_testing], axis=0).iloc[:, :2].sort_values(by=["DateTime","MachineID"])
  
#     grouped = df_all.groupby(df_all.index // 3)
#     print(grouped["DateTime"].min())
#     print(grouped["DateTime"].max())

#     new_df = pd.DataFrame({"MachineID": grouped["MachineID"].first(), "DateTime start": grouped['DateTime'].min(), "DateTime finish": grouped['DateTime'].max()})
#     new_df.to_csv("data/ml_model_data/testingdata.csv", index=False, sep=",")
    
# merge_datapoints()


In [11]:
# class points_times(BaseModel):
#     points: list[tuple[float, float]] = []
#     times: list[datetime] = []

# class predicted_load_dump(BaseModel):
#     load: points_times = points_times()
#     dump: points_times = points_times()

# class stats(BaseModel): #Represents actual data
#     all_positions: list[Position] = []  # Positions recorded during a day
#     load: points_times = points_times() # Load points and times
#     dump: points_times = points_times() # Dump points and times
#     day_speeds: list[float] = []        # Speeds
#     day_dists: list[float] = []         # Distances between each recording
#     day_times: list[datetime] = []      # Timestamp for two above lists
#     inner_prods: list[float] = []       # Inner product of consecutive normalized driving vectors


# class automated_load_dump_for_machine():

#     def __init__(self, 
#                  machine_data: Machine): 
#                 #  load_cluster_centers: typing.Any,# Both should be list[tuple[float, float]], should implement 
#                 #  dump_cluster_centers: typing.Any) -> None:
        
#         self.machine = machine_data
#         self.predicted = predicted_load_dump()
#         self.stats = stats()

#         all_pos = [trips.positions for trips in self.machine.trips]
#         self.stats.all_positions = [item for sublist in all_pos for item in sublist]
#         self.stats.load.points = [trips.load_latlon for trips in self.machine.trips]
#         self.stats.load.times = [trips.positions[0].timestamp for trips in self.machine.trips]
#         self.stats.dump.points = [trips.dump_latlon for trips in self.machine.trips]
        
#         actual_dump_times = []
#         for t in self.machine.trips:        #Not pretty, because we don't have dump time in trip info by default
#             temp_dump_laton = t.dump_latlon # Must match latlons
#             for position in t.positions:
#                 if temp_dump_laton == (position.lat, position.lon):
#                     actual_dump_times.append(position.timestamp)
#                     break
#         self.stats.dump.times = actual_dump_times

#         # self.load_cluster_centers = load_cluster_centers
#         # self.dump_cluster_centers = dump_cluster_centers

#         #These four lines should be rewritten, to a class or something
#         self.entering_load_working_area = []
#         self.exiting_load_working_area = []
#         self.entering_dump_working_area = []
#         self.exiting_dump_working_area = []
#         self.in_dumping_area: list[int | bool] = []
#         self.in_loading_area: list[int | bool] = []
#         self.meters_from_last_act = []  
#         self.is_next_load = []          

#     def predict(self):

#         #We know first loading because that is when data begins
#         self.predicted.load.points.append((self.stats.all_positions[0].lat, self.stats.all_positions[0].lon))
#         self.predicted.load.times.append(self.stats.all_positions[0].timestamp)

#         #When true, we are predicting load, else dump. Next prediction will be dump, since we have load above
#         predicting_load = False

#         #Initialize variables that keep track of whether or not we are in a usual area for loading or dumping
#         #As given by load and dump clusters and criterias.meters_from_area
#         in_load_working_area = False #Probably true, since first position is when loading
#         in_dump_working_area = False #Maybe false, since first position is when loading

        
#         #We determine the true value of the two above variables
#         # for coord in self.load_cluster_centers:  #But verify with created clusters of load points from day before
#         #         if geopy.distance.geodesic(coord, (self.stats.all_positions[0].lat, self.stats.all_positions[0].lon)).m < criterias.meters_from_area:
#         #             in_load_working_area = True
#         #             self.in_loading_area.append(1)
#         #         else:
#         #             self.in_loading_area.append(0)

#         # for coord in self.dump_cluster_centers: #But verify with created clusters of load points from day before
#         #         if geopy.distance.geodesic(coord, (self.stats.all_positions[0].lat, self.stats.all_positions[0].lon)).m < criterias.meters_from_area:
#         #             in_dump_working_area = True
#         #             self.in_dumping_area.append(1)
#         #         else:
#         #             self.in_dumping_area.append(0)

        

#         #We keep track of how many meters we have driven since last dump or load    
#         meters_since_last_activity = 0
        
#         # add initial values where we have the load
#         self.meters_from_last_act.append(0)
#         self.is_next_load.append(0)
#         self.stats.day_times.append(self.stats.all_positions[0].timestamp)
#         # speed is added in the loop

#         #We start predicting. Are going to iterate over all positions, from first to last
#         for i in range(1,len(self.stats.all_positions)):
            
#             current_pos = self.stats.all_positions[i]
#             prev_pos = self.stats.all_positions[i-1]

#             #Seconds passed since last timestamp
#             seconds_gone = (current_pos.timestamp.to_pydatetime()-prev_pos.timestamp.to_pydatetime()).total_seconds()
#             if seconds_gone <= 0:
#                 # self.stats.day_speeds.append(self.stats.day_speeds[-1])
#                 # self.stats.inner_prods.append(self.stats.inner_prods[-1])
#                 # # self.stats.inner_prods.append(self.stats.inner_prods[-1])
#                 # self.stats.day_times.append(self.stats.day_times[-1])

#                 # just add some unique value so that we can remove these duplicate rows later
#                 self.stats.day_speeds.append(np.nan)
#                 # self.stats.inner_prods.append(np.nan)
#                 # self.stats.inner_prods.append(self.stats.inner_prods[-1])
#                 self.stats.day_times.append(self.stats.day_times[-1])
#                 self.meters_from_last_act.append(np.nan)
#             if seconds_gone > 0:

#                 #Meters driven since last timestamp
#                 meters_driven = geopy.distance.geodesic((current_pos.lat, current_pos.lon), (prev_pos.lat, prev_pos.lon)).m
                
#                 meters_since_last_activity += meters_driven
#                 # if we have either load or dump, distance from last activity is set to 0
#                 for sublist in [self.stats.load.points, self.stats.dump.points]:
#                     if (self.stats.all_positions[i].lat, self.stats.all_positions[i].lon) in sublist:#, self.stats.dump.points]:
#                         meters_since_last_activity=0
                    
                    
#                 self.meters_from_last_act.append(meters_since_last_activity/1000) # km

#                 #Meters driven since last timestamp

#                 # this is the speed at point i-1 (forward derivative)
#                 speed_kmh = (meters_driven/seconds_gone)*3.6

#                 #Add the speed to a list for entire day
#                 self.stats.day_speeds.append(speed_kmh)

#                 #Add the distance (km) between the two timestamps 
#                 # self.stats.day_dists.append(meters_driven/1000)

#                 #Add the timestamp for the two above values
#                 self.stats.day_times.append(current_pos.timestamp)

#                 #Compute vectors. This is a lot of code, maybe create some function?
#                 #Create vector for computing reverse of vehicle
#                 # vector_start = current_pos.timestamp-timedelta(seconds=criterias.seconds_for_vector)
#                 # index_start_vector = 0
#                 # for j, ts in enumerate(self.stats.day_times):
#                 #     if ts >= vector_start:
#                 #         index_start_vector = j
#                 #         break
                
#                 # current_vector = [self.stats.all_positions[i].lat-self.stats.all_positions[i-3].lat,
#                 #                     self.stats.all_positions[i].lon-self.stats.all_positions[i-3].lon]
#                 # prev_vector = [self.stats.all_positions[index_start_vector].lat-self.stats.all_positions[index_start_vector-3].lat,
#                 #                 self.stats.all_positions[index_start_vector].lon-self.stats.all_positions[index_start_vector-3].lon]

#                 # current_vector_norm = current_vector/np.linalg.norm(current_vector)
#                 # prev_vector_norm = prev_vector/np.linalg.norm(prev_vector)
#                 # inner_product = np.inner(current_vector_norm,prev_vector_norm)
#                 # self.stats.inner_prods.append(inner_product)
            
#                 #Check if we are currently in a loading or dumping working area
#                 #If yes, we update value
                
                
                
                
#                 # currently_in_load_working_area = False
#                 # for coord in self.load_cluster_centers:
#                 #     if geopy.distance.geodesic(coord, (current_pos.lat, current_pos.lon)).m < criterias.meters_from_area:
#                 #         currently_in_load_working_area = True
                
#                 # currently_in_dump_working_area = False
#                 # for coord in self.dump_cluster_centers:
#                 #     if geopy.distance.geodesic(coord, (current_pos.lat, current_pos.lon)).m < criterias.meters_from_area:
#                 #         currently_in_dump_working_area = True

#                 # #Could be interesting to mark where we enter and exit load and dump zones. Can be done with this code.
#                 # #Can be used for plotting, see notebooks for examples    
#                 # if not in_load_working_area and currently_in_load_working_area:
#                 #     self.entering_load_working_area.append(current_pos.timestamp)
#                 #     in_load_working_area = True
#                 # elif in_load_working_area and not currently_in_load_working_area:
#                 #     self.exiting_load_working_area.append(current_pos.timestamp)
#                 #     in_load_working_area = False

#                 # if not in_dump_working_area and currently_in_dump_working_area:
#                 #     self.entering_dump_working_area.append(current_pos.timestamp)
#                 #     in_dump_working_area = True
#                 # elif in_dump_working_area and not currently_in_dump_working_area:
#                 #     self.exiting_dump_working_area.append(current_pos.timestamp)
#                 #     in_dump_working_area = False

#                 # #Logic for predicting loading point
#                 # if speed_kmh < criterias.speed_limit and meters_since_last_activity>criterias.meters_since_last_activity:
#                 #     if predicting_load:
#                 #         if in_load_working_area:
#                 #             last_min_start = current_pos.timestamp-timedelta(minutes=criterias.minutes_load)
#                 #             index_start_minute = 0
#                     #         for i, ts in enumerate(self.stats.day_times):
#                     #             if ts >= last_min_start:
#                     #                 index_start_minute = i
#                     #                 break
#                     #         sum_over_last_minute = np.sum(self.stats.day_speeds[index_start_minute:])
                            
#                     #         if sum_over_last_minute < criterias.max_sum_last_x_minutes_load:
#                     #             self.predicted.load.points.append((current_pos.lat, current_pos.lon))
#                     #             self.predicted.load.times.append(current_pos.timestamp)
                                
#                     #             #Have now predicted a load, next dump
#                     #             predicting_load = False
#                     #             meters_since_last_activity = 0

#                     # else: #Predicting dump
#                     #     if in_dump_working_area:
#                     #         last_min_start = current_pos.timestamp-timedelta(minutes=criterias.minutes_dump)
#                     #         index_start_minute = 0
#                     #         for i, ts in enumerate(self.stats.day_times):
#                     #             if ts >= last_min_start:
#                     #                 index_start_minute = i
#                     #                 break
#                     #         sum_over_last_minute = np.sum(self.stats.day_speeds[index_start_minute:])
                            
#                     #         if sum_over_last_minute < criterias.max_sum_last_x_minutes_dump and self.stats.inner_prods[-1] < criterias.inner_prod_threshold: #Not ideal, want to instead pick the best among times, not just the first viable option, but difficult with live tracking.
#                     #             self.predicted.dump.points.append((current_pos.lat, current_pos.lon))
#                     #             self.predicted.dump.times.append(current_pos.timestamp)
                                
#                     #             #Have now predicted a dump, next load
#                     #             predicting_load = True
#                     #             meters_since_last_activity = 0

#     def get_df_with_ml_data(self):
#         load_times_set = set(self.stats.load.times)
#         dump_times_set = set(self.stats.dump.times)
#         positions = self.stats.all_positions
#         latitude = [sublist.lat for sublist in positions]
#         longitude = [sublist.lon for sublist in positions]
#         load = [time in load_times_set for time in self.stats.day_times]
#         dump = [time in dump_times_set for time in self.stats.day_times]
#         print('len(speed) :', len(self.stats.day_speeds))
#         print('len lon: ', len(longitude))
#         print('len lat:' , len(latitude))
#         print('Load', len(load))
#         print('dump', len(dump))
#         print('datetime', len(self.stats.day_times))
#         print('machine id', self.machine.machine_id)
#         print('meters from last act', len(self.meters_from_last_act[:-1]))
#         #  create Dataframe with the given variables
#         # we dont have the speed for the last datapoint as it uses forward derivative scheme
#         df = pd.DataFrame({
#             "MachineID": [self.machine.machine_id]*len(self.stats.day_times),
#             "DateTime": self.stats.day_times,
#             # "Time_from_start": [(time.min - self.stats.day_times[0].min) for time in self.stats.day_times],
#             "Speed": self.stats.day_speeds,
#             # "Inner_products": self.stats.inner_prods,
#             "Latitude": latitude,
#             "Longitude": longitude,
#             "km_from_last_event": self.meters_from_last_act,
#             "Load": load,
#             "Dump": dump
#         })

#         # filter df to remove the rows after the last dump (True,False)
#         last_row = df.query('A == True and B == False').index[-1]
#         df = df.loc[:last_row]
        
#         return df
#         # df.to_csv(f'data/ml_model_data/time_speed_{self.machine.machine_id}.csv', index=False, sep=',')

#         # # Save units to csv file
#         # units = pd.DataFrame({
#         #     "DateTime": ["Datetime"],
#         #     "Time_from_start": ["Minutes"],
#         #     "Speed": ["km/h"],
#         #     "Inner_products": ["-"],
#         #     "Load": ["-"],
#         #     "Dump": ["-"]
#         # })
#         # units.to_csv(f'data/ml_model_data/units_{self.machine.machine_id}.csv', index=False, sep=',')

    
#     def time_plot(self):

#         # Plots, not wanted when a lot of data
#         # Create subplots with 2 rows and 1 column
#         fig = make_subplots(rows=3, cols=1, shared_xaxes=True, vertical_spacing=0.1)

#         # Add the first line plot to the first subplot
        
#         fig.add_trace(go.Scatter(x=self.stats.day_times, y=self.stats.day_speeds, mode='lines', name='Speed'), row=1, col=1)
#         fig.add_trace(go.Scatter(x=self.stats.load.times, y=[0 for a in self.stats.load.times], mode='markers', marker=dict(symbol='cross', size=10, color='red'), name='Load actual'), row=1, col=1)
#         fig.add_trace(go.Scatter(x=self.predicted.load.times, y=[0 for p in self.predicted.load.times], mode='markers', marker=dict(symbol='star', size=10, color='red'), name='Load predicted'), row=1, col=1)
#         fig.add_trace(go.Scatter(x=self.stats.dump.times, y=[0 for a in self.stats.dump.times], mode='markers', marker=dict(symbol='cross', size=10, color='green'), name='Dump actual'), row=1, col=1)
#         fig.add_trace(go.Scatter(x=self.predicted.dump.times, y=[0 for p in self.predicted.dump.times], mode='markers', marker=dict(symbol='star', size=10, color='green'), name='Dump predicted'), row=1, col=1)

#         # Add the second line plot to the second subplot
#         fig.add_trace(go.Scatter(x=self.stats.day_times, y=np.cumsum(self.stats.day_dists), mode='lines', name='Cumulative distance'), row=2, col=1)
#         fig.add_trace(go.Scatter(x=self.stats.load.times, y=[0 for a in self.stats.load.times], mode='markers', marker=dict(symbol='cross', size=10, color='red'), name='Load actual'), row=2, col=1)
#         fig.add_trace(go.Scatter(x=self.predicted.load.times, y=[0 for p in self.predicted.load.times], mode='markers', marker=dict(symbol='star', size=10, color='red'), name='Load predicted'), row=2, col=1)
#         fig.add_trace(go.Scatter(x=self.stats.dump.times, y=[0 for a in self.stats.dump.times], mode='markers', marker=dict(symbol='cross', size=10, color='green'), name='Dump actual'), row=2, col=1)
#         fig.add_trace(go.Scatter(x=self.predicted.dump.times, y=[0 for p in self.predicted.dump.times], mode='markers', marker=dict(symbol='star', size=10, color='green'), name='Dump predicted'), row=2, col=1)

#         # Add the third line plot
#         fig.add_trace(go.Scatter(x=self.stats.day_times, y=self.stats.inner_prods, mode='lines', name='Inner product of vectors'), row=3, col=1)

#         # Update layout settings for both subplots
#         fig.update_layout(title=str('Subplots of Speeds and cumulative distance, machine_id: '+ str(self.machine.machine_id)),
#                         xaxis_title='Timestamp',
#                         showlegend=True)

#         fig.show()

#     def gantt_plot(self):

#         #Actual trips
#         all_trips_for_machine = self.machine.trips
#         start_end_each_trip_dict_actual = [dict(Start=trips.start_date, End=trips.end_date, Load=trips.load, Dist=trips.length, Id=trips.trip_id) for trips in all_trips_for_machine]
#         df_pyplot_actual = pd.DataFrame(start_end_each_trip_dict_actual)
        
        
#         #Predicted trips
#         start_end_each_trip_dict_predicted = [dict(Start=self.predicted.load.times[i], End=self.predicted.load.times[i+1]) for i in range(len(self.predicted.load.times)-1)]
#         df_pyplot_predicted = pd.DataFrame(start_end_each_trip_dict_predicted)

#         fig = px.timeline(df_pyplot_actual, x_start="Start", x_end="End", custom_data=["Load", "Dist", "Id"])
#         fig.update_traces(
#             hovertemplate="<br>".join([
#                 "Start: %{base}",
#                 "End: %{x}",
#                 "Load: %{customdata[0]}",
#                 "Distance: %{customdata[1]}",
#                 "ID: %{customdata[2]}"
#             ])
#         )
#         fig.update_yaxes(autorange="reversed") # otherwise tasks are listed from the bottom up
#         fig.show()

#         fig = px.timeline(df_pyplot_predicted, x_start="Start", x_end="End")
#         fig.update_traces(
#             hovertemplate="<br>".join([
#                 "Start: %{base}",
#                 "End: %{x}"
#             ])
#         )
#         fig.update_yaxes(autorange="reversed") # otherwise tasks are listed from the bottom up
#         fig.show()
