# Imports and Data File

In [None]:
import sys
import pandas as pd
import numpy as np
from haversine import haversine
import matplotlib.pyplot as plt

sys.path.insert(0, '/workspaces/detecting-dine-out-patterns/src')
import util
from util.distance import average_distance_between_rows
from util.distance import median_distance_between_rows

In [None]:
file_path = '../../data/movements.csv'
movements_data = pd.read_csv(file_path, parse_dates=['datetime'])

# Cut-down Data

### By Evenness

In [None]:

def is_even_second(timestamp):
    return timestamp.second % 2 == 0

def analyze_measurements(df):
    total_measurements = len(df)
    even_second_measurements = len(df[df['datetime'].dt.second % 2 == 0])

    print(f"Total measurements: {total_measurements}")
    print(f"Measurements at even seconds: {even_second_measurements}")

analyze_measurements(movements_data)

In [None]:
def average_time_apart(df):
    df['second'] = df['datetime'].dt.second
    even_second_df = df[df['second'] % 2 == 0]

    even_second_df = even_second_df.sort_values(by='datetime')
    even_second_df['time_diff'] = even_second_df['datetime'].diff().dt.total_seconds()
    average_diff = even_second_df['time_diff'].mean()
    print(f"Average time apart between even second measurements: {average_diff} seconds")

average_time_apart(movements_data)



### By Distance

In [None]:
grouped = movements_data.groupby('id')
results = []

for name, group in grouped:
    avg_distance = average_distance_between_rows(group)
    med_distance = median_distance_between_rows(group)
    results.append({'id': name, 'avg_distance': avg_distance, 'med_distance': med_distance})

results_df = pd.DataFrame(results)
print(results_df)


In [None]:
# Filter the data for id I000
i000_data = movements_data[movements_data['id'] == 'I000']

# Calculate the distances between consecutive points
i000_data = i000_data.sort_values(by='datetime')
distances = []
for i in range(len(i000_data) - 1):
    lat1, lon1 = i000_data.iloc[i][['latitude', 'longitude']]
    lat2, lon2 = i000_data.iloc[i + 1][['latitude', 'longitude']]
    distance = haversine((lat1, lon1), (lat2, lon2))
    distances.append({'from_id': i000_data.iloc[i]['id'], 'to_id': i000_data.iloc[i + 1]['id'], 'distance (km)': distance})
    
i000_data['distance'] = distances

# Plot the distances
plt.figure(figsize=(10, 6))
plt.plot(i000_data['datetime'], i000_data['distance'], marker='o', linestyle='-', color='b')
plt.xlabel('Datetime')
plt.ylabel('Distance (km)')
plt.title('Distance Moved for ID I000')
plt.grid(True)
plt.show()

In [None]:

last_points = movements_data.groupby('id').last().reset_index()
first_points = movements_data.groupby('id').first().reset_index()

distances = []
for i in range(len(last_points) - 1):
    lat1, lon1 = last_points.iloc[i][['latitude', 'longitude']]
    lat2, lon2 = first_points.iloc[i + 1][['latitude', 'longitude']]
    distance = haversine((lat1, lon1), (lat2, lon2))
    distances.append({'from_id': last_points.iloc[i]['id'], 'to_id': first_points.iloc[i + 1]['id'], 'distance (km)': distance})

distances_df = pd.DataFrame(distances)
print(distances_df)


### Every Other 

In [None]:
def average_time_apart_every_other(file_path):
    df = pd.read_csv(file_path, parse_dates=['datetime'])
    df = df.sort_values(by='datetime')
    df['time_diff'] = df['datetime'].diff().dt.total_seconds()
    average_diff = df['time_diff'].mean()
    print(f"Average time apart between every other measurement: {average_diff} seconds")

average_time_apart_every_other('../data/every_other_movement.csv')

In [None]:
# def save_every_other_measurement(file_path, output_path):
#     df = pd.read_csv(file_path, parse_dates=['datetime'])
#     every_other_df = df.iloc[::2]
#     every_other_df.to_csv(output_path, index=False)
#     print(f"Saved every other measurement to {output_path}")

# save_every_other_measurement('../data/movements.csv', '../data/every_other_movement.csv')
