In [425]:
import pandas as pd
from math import radians, cos, sin, asin, sqrt
base_folder = 'heatmap_folder_ride/1_1_Start/'
heatmap_data = pd.read_csv(base_folder + 'heatmap.csv')

In [426]:
def haversine(lon1, lat1, lon2, lat2):
        lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
        dlon = lon2 - lon1 
        dlat = lat2 - lat1 
        a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
        c = 2 * asin(sqrt(a)) 
        r = 6371 
        return c * r * 1000

In [427]:
def segment_overlaps_with_existing_segments(segment, segments, threshold, majority):
    segment_points_in_segments = sum(point_in_any_segment(point, segments, threshold) for point in segment.values)
    return segment_points_in_segments / len(segment) >= majority

def point_in_any_segment(point, segments, threshold):
    for segment in segments.values():
        for segment_point in segment.values:
            if haversine(point[1], point[0], segment_point[1], segment_point[0]) <= threshold:
                return True
    return False

In [428]:
def filter_segments(segments, threshold=0.7):
    total_points = sum(len(segment) for segment in segments.values())
    average_points = total_points / len(segments)
    filtered_segments = {segment_id: segment for segment_id, segment in segments.items() if len(segment) >= threshold * average_points}

    # replace the original segments with filtered segments
    return filtered_segments

In [429]:
import pandas as pd
from geopy.distance import distance as geo_distance
import os
import gpxpy
# Generate your heatmap data

# Filter the heatmap data
popular_points = heatmap_data[heatmap_data['intensity'] > 0.5]
dfs = []
# Now, for each activity, check if its GPS points fall within a "popular" area
for filename in os.listdir(base_folder):
    if filename.endswith('.gpx'):
        with open(base_folder + filename, 'r') as gpx_file:
            gpx = gpxpy.parse(gpx_file)

            # List to store all points from all segments
            all_coords = []
            all_times = []

            for track in gpx.tracks:
                for segment in track.segments:
                    points = list(segment.points)
                    coords = [(point.latitude, point.longitude) for point in points]
                    times = [point.time for point in points]

                    # Extend the list of all points with the points from the current segment
                    all_coords.extend(coords)
                    all_times.extend(times)
            print(f"Total Points in {filename}: {len(all_coords)}")
            print(f"Total Timestamp Points in {filename}: {len(all_times)}")

            count = 0
            for ((lat2, lon2), time2) in zip(all_coords, all_times):
                # Calculate distance from point to each popular point
                dists = [haversine(lon, lat, lon2, lat2) for lat, lon in popular_points[['latitude', 'longitude']].values]

                # If the minimum distance is less than or equal to 15 meters, the point is popular
                if min(dists) <= 15:
                    # Add popular point to DataFrame
                    if lat2 is not None and lon2 is not None and time2 is not None and filename is not None:
                        count += 1
                        df = pd.DataFrame({'latitude': [lat2], 'longitude': [lon2], 'timestamp': [time2], 'filename': [filename]})
                        dfs.append(df)
            print(f"Popular Points: {count}")
            #print(dfs)
            popular_points_df = pd.concat(dfs, ignore_index=True)



Total Points in 8914248054.gpx: 155
Total Timestamp Points in 8914248054.gpx: 155
Popular Points: 152
Total Points in 9203463920.gpx: 160
Total Timestamp Points in 9203463920.gpx: 160
Popular Points: 92
Total Points in 8967826254.gpx: 162
Total Timestamp Points in 8967826254.gpx: 162
Popular Points: 161
Total Points in 9215163707.gpx: 147
Total Timestamp Points in 9215163707.gpx: 147
Popular Points: 147
Total Points in 9066438551.gpx: 476
Total Timestamp Points in 9066438551.gpx: 476
Popular Points: 149
Total Points in 8955805139.gpx: 164
Total Timestamp Points in 8955805139.gpx: 164
Popular Points: 162
Total Points in 9139481497.gpx: 191
Total Timestamp Points in 9139481497.gpx: 191
Popular Points: 187
Total Points in 9083190848.gpx: 185
Total Timestamp Points in 9083190848.gpx: 185
Popular Points: 149
Total Points in 8885052801.gpx: 176
Total Timestamp Points in 8885052801.gpx: 176
Popular Points: 175
Total Points in 9126682597.gpx: 148
Total Timestamp Points in 9126682597.gpx: 148
P

In [430]:
popular_points_df

Unnamed: 0,latitude,longitude,timestamp,filename
0,33.670116,-117.829435,2023-04-18 10:40:14.810637+00:00,8914248054.gpx
1,33.670138,-117.829561,2023-04-18 10:40:17.044521+00:00,8914248054.gpx
2,33.670166,-117.829702,2023-04-18 10:40:19.559762+00:00,8914248054.gpx
3,33.670172,-117.829744,2023-04-18 10:40:20.299182+00:00,8914248054.gpx
4,33.670177,-117.829792,2023-04-18 10:40:21.138551+00:00,8914248054.gpx
...,...,...,...,...
1515,33.648327,-117.840307,2023-05-23 10:29:05.012301+00:00,9126682597.gpx
1516,33.648305,-117.840260,2023-05-23 10:29:05.888795+00:00,9126682597.gpx
1517,33.648250,-117.840180,2023-05-23 10:29:07.575368+00:00,9126682597.gpx
1518,33.648197,-117.840109,2023-05-23 10:29:09.125554+00:00,9126682597.gpx


In [431]:
import pandas as pd

segments = {}
segment_id = 0

# Iterate through the dataframe, grouping by filename
for filename, group in popular_points_df.groupby('filename'):
    # Convert the group to a list of coordinates, timestamp, and filename
    coords = group[['latitude', 'longitude', 'timestamp', 'filename']].values.tolist()
    
    # Start the first segment
    current_segment = [coords[0]]
    last_coord = coords[0]
    total_distance = 0
    
    # Iterate over the coordinates
    for coord in coords[1:]:
        # Calculate the distance to the last coordinate
        distance = haversine(last_coord[1], last_coord[0], coord[1], coord[0])
        total_distance += distance

        if total_distance < 500:
            # If the total_distance is less than 500 meters, add it to the current segment
            current_segment.append(coord)
        else:
            # Otherwise, start a new segment
            segments[segment_id] = pd.DataFrame(current_segment, columns=['latitude', 'longitude', 'timestamp', 'filename'])
            segment_id += 1
            current_segment = [last_coord, coord]
            total_distance = distance

        last_coord = coord
    
    # Add the last segment
    if current_segment:
        segments[segment_id] = pd.DataFrame(current_segment, columns=['latitude', 'longitude', 'timestamp', 'filename'])
        segment_id += 1


In [432]:
for segment_id, segment in segments.items():
    first_point = segment.iloc[0].tolist()
    last_point = segment.iloc[-1].tolist()
    segment_length = haversine(first_point[1], first_point[0], last_point[1], last_point[0])
    print(f"Segment {segment_id}: length = {segment_length} meters")

Segment 0: length = 407.84107651817226 meters
Segment 1: length = 446.95611103429195 meters
Segment 2: length = 476.48858273790313 meters
Segment 3: length = 472.8324955957957 meters
Segment 4: length = 492.1348495359107 meters
Segment 5: length = 367.15844211501934 meters
Segment 6: length = 393.4055566709259 meters
Segment 7: length = 200.44741586170503 meters
Segment 8: length = 411.75818534090973 meters
Segment 9: length = 478.89343030480904 meters
Segment 10: length = 456.39287693342544 meters
Segment 11: length = 455.4725016045627 meters
Segment 12: length = 489.0245901209653 meters
Segment 13: length = 373.33959322137224 meters
Segment 14: length = 390.9595178603972 meters
Segment 15: length = 225.5222105829802 meters
Segment 16: length = 380.5978530064316 meters
Segment 17: length = 485.8191983920249 meters
Segment 18: length = 448.8349327920532 meters
Segment 19: length = 461.2251831590408 meters
Segment 20: length = 488.8923815725978 meters
Segment 21: length = 423.4265485859

In [433]:
def check_and_merge_segments(segments, threshold=20, majority=0.6):
    # Make a copy of the segments for iteration, changes will be made to the original
    segments_copy = segments.copy()

    # Iterate over the copy
    for segment_id, segment_df in segments_copy.items():
        print("Segment ID:", segment_id)
        # Check if the segment exists in the segments (it could have been deleted in a previous iteration)
        if segment_id in segments:
            for other_segment_id, other_segment_df in segments.items():
                if segment_id != other_segment_id and other_segment_id in segments:
                    # Check for overlap and merge if a majority of points are close
                    if segment_overlaps_with_existing_segments(segment_df, {other_segment_id: other_segment_df}, threshold, majority):
                        # Merge segments
                        print(f"Shape of {segment_id}: {segment_df.shape}")
                        print(f"Shape of {other_segment_id} Before Merging: {segments[other_segment_id].shape}")
                        segments[other_segment_id] = pd.concat([segment_df, other_segment_df]).drop_duplicates().reset_index(drop=True)
                        print(f"Shape of {other_segment_id} After Merging: {segments[other_segment_id].shape}")
                        # Delete the other segment
                        del segments[segment_id]
                        # Recursively call the function again, as the segments dictionary has been updated
                        return check_and_merge_segments(segments, threshold, majority)

    # If no merges occurred, return the final segments
    return segments

# Now you can call this function with your existing segments dictionary
segments = check_and_merge_segments(segments)


Segment ID: 0
Shape of 0: (28, 4)
Shape of 8 Before Merging: (27, 4)
Shape of 8 After Merging: (55, 4)
Segment ID: 1
Shape of 1: (24, 4)
Shape of 9 Before Merging: (21, 4)
Shape of 9 After Merging: (45, 4)
Segment ID: 2
Shape of 2: (18, 4)
Shape of 18 Before Merging: (16, 4)
Shape of 18 After Merging: (34, 4)
Segment ID: 3
Shape of 3: (15, 4)
Shape of 11 Before Merging: (12, 4)
Shape of 11 After Merging: (27, 4)
Segment ID: 4
Shape of 4: (21, 4)
Shape of 12 Before Merging: (20, 4)
Shape of 12 After Merging: (41, 4)
Segment ID: 5
Shape of 5: (24, 4)
Shape of 13 Before Merging: (25, 4)
Shape of 13 After Merging: (49, 4)
Segment ID: 6
Shape of 6: (28, 4)
Shape of 14 Before Merging: (27, 4)
Shape of 14 After Merging: (55, 4)
Segment ID: 7
Shape of 7: (24, 4)
Shape of 15 Before Merging: (13, 4)
Shape of 15 After Merging: (37, 4)
Segment ID: 8
Shape of 8: (55, 4)
Shape of 16 Before Merging: (33, 4)
Shape of 16 After Merging: (88, 4)
Segment ID: 9
Shape of 9: (45, 4)
Shape of 17 Before Mergin

In [434]:
import folium

# Create a map centered at an arbitrary location
m = folium.Map(location=[popular_points_df['latitude'].mean(), popular_points_df['longitude'].mean()], zoom_start=13)

# Define a set of colors for the segments
colors = ['red', 'blue', 'green', 'purple', 'orange', 'darkred', 'lightred', 'beige', 'darkblue', 'darkgreen', 'cadetblue', 'darkpurple', 'pink', 'lightblue', 'lightgreen', 'gray', 'black', 'lightgray']

# Iterate over the segments
for segment_id, segment_data in segments.items():
    # Choose a color for this segment
    color = colors[segment_id % len(colors)]

    # Add each point in this segment to the map
    for _, row in segment_data.iterrows():
        folium.CircleMarker(
            location=(row['latitude'], row['longitude']),
            radius=3,
            color=color,
            fill=True,
            fill_color=color,
            fill_opacity=0.7,
        ).add_to(m)

# Display the map
m

In [435]:
m.save(f"{base_folder}/segment_init.html")

In [436]:
filtered_segments = filter_segments(segments)

In [437]:
for segment,value in filtered_segments.items():
    print(f"Segment: {segment} - Length: {value.shape[0]}")

Segment: 83 - Length: 334
Segment: 84 - Length: 203
Segment: 85 - Length: 155
Segment: 86 - Length: 120
Segment: 87 - Length: 136
Segment: 88 - Length: 168
Segment: 89 - Length: 201
Segment: 90 - Length: 156


In [447]:
from collections import Counter, defaultdict
import pandas as pd
from datetime import timedelta

weekday_dict = {0: 'Monday', 1: 'Tuesday', 2: 'Wednesday', 3: 'Thursday', 4: 'Friday', 5: 'Saturday', 6: 'Sunday'}
result = []

for segment_id, df in filtered_segments.items():
    # convert the 'timestamp' column to datetime
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    
    # create new columns for the hour and the day of the week
    df['hour'] = df['timestamp'].dt.hour
    df['minute'] = df['timestamp'].dt.minute
    df['second'] = df['timestamp'].dt.second
    df['day'] = df['timestamp'].dt.dayofweek

    # calculate the 10th and 90th percentile times
    lower_bound_hour = int(df['hour'].quantile(0.25))
    lower_bound_minute = int(df['minute'].quantile(0.25))
    lower_bound_second = int(df['second'].quantile(0.25))
    lower_bound_time = str(timedelta(hours=lower_bound_hour, minutes=lower_bound_minute, seconds=lower_bound_second))
    
    upper_bound_hour = int(df['hour'].quantile(0.75))
    upper_bound_minute = int(df['minute'].quantile(0.75))
    upper_bound_second = int(df['second'].quantile(0.75))
    upper_bound_time = str(timedelta(hours=upper_bound_hour, minutes=upper_bound_minute, seconds=upper_bound_second))

    # Get Start and End Coordinates
    start_coord = df.iloc[0][['latitude', 'longitude']].tolist()
    end_coord = df.iloc[-1][['latitude', 'longitude']].tolist()

    # Count the number of unique activities for each day
    activities_per_day = defaultdict(set)
    for _, row in df.iterrows():
        activities_per_day[row['day']].add(row['filename'])
    
    #activities_per_day_counts = {weekday_dict[day]: len(filenames) for day, filenames in activities_per_day.items()}
    #activities_per_day_str = ', '.join([f'{day} - {count}' for day, count in activities_per_day_counts.items() if count > 3])

    activities_per_day = {weekday_dict[day]: filenames for day, filenames in activities_per_day.items()}
    sorted_day_counts = sorted([(day, len(filenames)) for day, filenames in activities_per_day.items()], key=lambda item: item[1], reverse=True)

    # Keep only the items where the count is greater than 3, unless all items have counts <= 3
    filtered_day_counts = [(day, count) for day, count in sorted_day_counts if count >= 3 or sorted_day_counts[0][1] < 3]

    # Create string representation
    activities_per_day_str = ', '.join([f'{day} - {count}' for day, count in filtered_day_counts])


    # Append to results
    result.append({
        'Segment ID': segment_id,
        'Start Coordinate': start_coord,
        'End Coordinate': end_coord,
        'Time Range': (lower_bound_time, upper_bound_time),
        'Activities Per Day': activities_per_day_str
    })

# Convert to a DataFrame
result_df = pd.DataFrame(result)


In [448]:
result_df

Unnamed: 0,Segment ID,Start Coordinate,End Coordinate,Time Range,Activities Per Day
0,83,"[33.674055, -117.835626]","[33.670088, -117.834064]","(10:33:12, 11:44:39)","Tuesday - 5, Thursday - 3"
1,84,"[33.670175, -117.833982]","[33.66626, -117.835207]","(10:37:13, 10:45:46)","Tuesday - 5, Thursday - 3"
2,85,"[33.666103, -117.835225]","[33.66223, -117.83728]","(10:39:15, 10:47:43)","Tuesday - 5, Thursday - 3"
3,86,"[33.66228, -117.837134]","[33.659032, -117.840716]","(10:41:13, 10:48:45)","Tuesday - 5, Thursday - 3"
4,87,"[33.659147, -117.840568]","[33.655942, -117.844039]","(10:42:13, 10:50:48)","Tuesday - 5, Thursday - 3"
5,88,"[33.655817, -117.844146]","[33.652751, -117.842446]","(10:42:13, 10:52:39)","Tuesday - 5, Thursday - 3"
6,89,"[33.652838, -117.84251]","[33.649145, -117.841542]","(10:44:14, 10:54:45)","Tuesday - 4, Thursday - 3"
7,90,"[33.648165, -117.840086]","[33.648203, -117.840143]","(10:47:12, 10:54:47)","Tuesday - 5, Thursday - 3"


In [449]:
import folium

# Create a map centered at an arbitrary location
m = folium.Map(location=[popular_points_df['latitude'].mean(), popular_points_df['longitude'].mean()], zoom_start=13)

# Define a set of colors for the segments
colors = ['red', 'blue', 'green', 'purple', 'orange', 'darkred', 'lightred', 'beige', 'darkblue', 'darkgreen', 'cadetblue', 'darkpurple', 'pink', 'lightblue', 'lightgreen', 'gray', 'black', 'lightgray']

# Prepare a dictionary to quickly look up segment info from the DataFrame
segment_info = result_df.set_index('Segment ID').T.to_dict()

# Iterate over the segments
for segment_id, segment_data in filtered_segments.items():
    # Choose a color for this segment
    color = colors[segment_id % len(colors)]
    
    # Get segment information for popups
    info = segment_info[segment_id]

    # Split the activities per day into separate lines
    activities_per_day_lines = info["Activities Per Day"].replace(", ", "<br>")

    # Add each point in this segment to the map
    for _, row in segment_data.iterrows():
        popup_text = f'Segment ID: {segment_id}<br>'\
                     f'Start Coordinate: {info["Start Coordinate"]}<br>'\
                     f'End Coordinate: {info["End Coordinate"]}<br>'\
                     f'Time Range (24 hr): {info["Time Range"][0]} to {info["Time Range"][1]}<br>'\
                     f'Activities Per Day of the Week (Count):<br>{activities_per_day_lines}'
                     
        folium.CircleMarker(
            location=(row['latitude'], row['longitude']),
            radius=3,
            color=color,
            fill=True,
            fill_color=color,
            fill_opacity=0.7,
            popup=folium.Popup(popup_text, max_width=300) # Add popup
        ).add_to(m)

# Display the map
m


In [450]:
m.save(f"{base_folder}/segment_final.html")