In [1]:
import json
import pandas as pd
import folium

# Load JSON data
with open('2023_JUNE.json', 'r') as file:
    data = json.load(file)

# Extract GPS data
gps_data = []
for item in data['timelineObjects']:
    if 'placeVisit' in item:
        location = item['placeVisit']['location']
        latitude = location['latitudeE7'] / 1e7  # Convert from E7 format
        longitude = location['longitudeE7'] / 1e7  # Convert from E7 format
        start_timestamp = item['placeVisit']['duration']['startTimestamp']
        end_timestamp = item['placeVisit']['duration']['endTimestamp']
        gps_data.append({
            'latitude': latitude,
            'longitude': longitude,
            'start_timestamp': start_timestamp,
            'end_timestamp': end_timestamp
        })

# Convert list of dicts into a DataFrame
df = pd.DataFrame(gps_data)

# Display the DataFrame
print(df)

# Save the DataFrame to a CSV file
csv_filename = 'gps_data.csv'
df.to_csv(csv_filename, index=False)

print(f"GPS data has been saved to {csv_filename}")

# Create a map centered around the average location
m = folium.Map(location=[df['latitude'].mean(), df['longitude'].mean()], zoom_start=13)
# Add points for each location in the DataFrame
for _, row in df.iterrows():
    folium.Marker(
        location=[row['latitude'], row['longitude']],
        popup=f"Start: {row['start_timestamp']}, End: {row['end_timestamp']}",
        icon=folium.Icon(color='blue')
    ).add_to(m)

# Display the map
m



      latitude  longitude           start_timestamp             end_timestamp
0    55.785574  12.521381  2023-06-01T05:54:41.697Z  2023-06-01T06:25:31.045Z
1    55.782916  12.523841  2023-06-01T06:30:30.693Z  2023-06-01T06:48:43.219Z
2    55.785574  12.521381  2023-06-01T06:52:11.613Z  2023-06-01T07:20:17.598Z
3    55.782916  12.523841  2023-06-01T07:23:01.604Z  2023-06-01T07:40:02.621Z
4    55.785574  12.521381  2023-06-01T07:43:05.123Z  2023-06-01T08:10:46.670Z
..         ...        ...                       ...                       ...
156  55.782170  12.511890  2023-06-29T19:27:23.271Z  2023-06-30T09:17:45.448Z
157  55.785574  12.521381  2023-06-30T09:32:31.701Z  2023-06-30T15:27:18.156Z
158  55.782170  12.511890  2023-06-30T15:29:23.176Z  2023-06-30T17:30:30.872Z
159  55.775221  12.577771  2023-06-30T17:44:50.837Z  2023-06-30T19:17:11.198Z
160  55.782170  12.511890  2023-06-30T19:46:37.496Z  2023-07-01T13:26:54.046Z

[161 rows x 4 columns]
GPS data has been saved to gps_data.csv


In [2]:
df.head()

Unnamed: 0,latitude,longitude,start_timestamp,end_timestamp
0,55.785574,12.521381,2023-06-01T05:54:41.697Z,2023-06-01T06:25:31.045Z
1,55.782916,12.523841,2023-06-01T06:30:30.693Z,2023-06-01T06:48:43.219Z
2,55.785574,12.521381,2023-06-01T06:52:11.613Z,2023-06-01T07:20:17.598Z
3,55.782916,12.523841,2023-06-01T07:23:01.604Z,2023-06-01T07:40:02.621Z
4,55.785574,12.521381,2023-06-01T07:43:05.123Z,2023-06-01T08:10:46.670Z


In [5]:
print(len(df))

161


In [30]:
import folium
from folium.plugins import HeatMap

# Convert list of dicts into a DataFrame
df = pd.DataFrame(gps_data)

# Create a map centered around the average location
m = folium.Map(location=[df['latitude'].mean(), df['longitude'].mean()], zoom_start=13)

# Prepare data for the HeatMap
# Each point in the HeatMap requires a [latitude, longitude] list
heat_data = [[row['latitude'], row['longitude']] for index, row in df.iterrows()]

# Create a HeatMap layer and add it to the map
HeatMap(heat_data).add_to(m)

# Display the map
m


In [25]:
from folium.plugins import MarkerCluster
# Create a MarkerCluster object
marker_cluster = MarkerCluster().add_to(m)

# Add markers to the cluster instead of the map
for _, row in df.iterrows():
    folium.Marker(
        location=[row['latitude'], row['longitude']],
        popup=f"Start: {row['start_timestamp']}, End: {row['end_timestamp']}",
        icon=folium.Icon(color='blue')
    ).add_to(marker_cluster)
m

In [11]:
import pandas as pd

# Assuming 'df' is your DataFrame and it has a 'start_timestamp' column in a recognizable datetime format

# Convert 'start_timestamp' to datetime
df['start_timestamp'] = pd.to_datetime(df['start_timestamp'])

# Get the latest date in your dataset
latest_date = df['start_timestamp'].max()

# Calculate the start date of the last week
start_last_week = latest_date - pd.Timedelta(days=7)

# Filter rows for the last week
last_week_data = df[df['start_timestamp'] > start_last_week]

# Filter rows for before the last week
before_last_week_data = df[df['start_timestamp'] <= start_last_week]

# Identify unique locations visited before the last week
before_last_week_locations = before_last_week_data[['latitude', 'longitude']].drop_duplicates()

# Identify unique locations visited in the last week
last_week_locations = last_week_data[['latitude', 'longitude']].drop_duplicates()

# Check which of the last week's places are new
new_places = last_week_locations[~last_week_locations.isin(before_last_week_locations.to_dict('records'))]

# Count the number of new places
num_new_places = len(new_places)

print(f"Number of new places visited in the last week: {num_new_places}")


Number of new places visited in the last week: 11


In [13]:
import folium

# Check if 'start_timestamp' column exists
if 'start_timestamp' in new_places.columns:
    popup_content = lambda row: f"New Place Visited: Start: {row['start_timestamp']}"
else:
    popup_content = "New Place Visited"

# Initialize a map object
m = folium.Map(location=[new_places['latitude'].mean(), new_places['longitude'].mean()], zoom_start=12)

# Add markers for each new place
for index, row in new_places.iterrows():
    folium.Marker(
        location=[row['latitude'], row['longitude']],
        popup=popup_content(row) if callable(popup_content) else popup_content,
        icon=folium.Icon(color='green')
    ).add_to(m)

# Display the map
m


## Change new place definition to 50 meters

In [14]:
pip install geopy


Defaulting to user installation because normal site-packages is not writeable
Collecting geopy
  Downloading geopy-2.4.1-py3-none-any.whl (125 kB)
[K     |████████████████████████████████| 125 kB 2.7 MB/s eta 0:00:01
[?25hCollecting geographiclib<3,>=1.52
  Downloading geographiclib-2.0-py3-none-any.whl (40 kB)
[K     |████████████████████████████████| 40 kB 14.1 MB/s eta 0:00:01
[?25hInstalling collected packages: geographiclib, geopy
Successfully installed geographiclib-2.0 geopy-2.4.1
You should consider upgrading via the '/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.


In [15]:
from geopy.distance import geodesic
import pandas as pd

# Assuming 'df' has 'latitude', 'longitude', and 'start_timestamp' after conversion to datetime

def is_new_place(place, existing_places, threshold=50):
    """
    Determine if a place is within 'threshold' meters of any place in 'existing_places'.
    Returns True if the place is new, False otherwise.
    """
    for _, existing_place in existing_places.iterrows():
        distance = geodesic(
            (place['latitude'], place['longitude']),
            (existing_place['latitude'], existing_place['longitude'])
        ).meters
        if distance <= threshold:
            return False
    return True

# Convert start_timestamp to datetime if not already done
df['start_timestamp'] = pd.to_datetime(df['start_timestamp'])

# Split the data into last week and before
latest_date = df['start_timestamp'].max()
start_last_week = latest_date - pd.Timedelta(days=7)
last_week_data = df[df['start_timestamp'] > start_last_week]
before_last_week_data = df[df['start_timestamp'] <= start_last_week]

# Determine new places
new_places = []
for _, place in last_week_data.iterrows():
    if is_new_place(place, before_last_week_data):
        new_places.append(place)

# Convert list of new places to DataFrame
new_places_df = pd.DataFrame(new_places)

# Count the new places
num_new_places = len(new_places_df)

print(f"Number of new places visited in the last week within 50 meters threshold: {num_new_places}")


Number of new places visited in the last week within 50 meters threshold: 5


In [17]:
new_places_df.head()

Unnamed: 0,latitude,longitude,start_timestamp,end_timestamp
133,55.625686,12.04626,2023-06-24 09:47:07.428000+00:00,2023-06-24T10:03:06.902Z
135,55.795968,12.473892,2023-06-24 16:05:39.496000+00:00,2023-06-24T16:27:56.681Z
149,55.781134,12.513048,2023-06-29 06:59:55.036000+00:00,2023-06-29T07:05:27.154Z
150,55.739317,12.486908,2023-06-29 07:41:18.660000+00:00,2023-06-29T08:14:40.694Z
151,55.738528,12.474488,2023-06-29 08:23:08.680000+00:00,2023-06-29T08:40:41.894Z


In [18]:
m = folium.Map(location=[new_places_df['latitude'].mean(), new_places_df['longitude'].mean()], zoom_start=13)

for _, row in new_places_df.iterrows():
    folium.Marker(
        location=[row['latitude'],row['longitude']],
        popup=f"Start: {row['start_timestamp']}, End: {row['end_timestamp']}",
        icon=folium.Icon(color='blue')
    ).add_to(m)
m

## Compare for pre data

In [23]:
import pandas as pd

# Assuming 'df' is your DataFrame and it has a 'start_timestamp' column in a recognizable datetime format

# Convert 'start_timestamp' to datetime
df['start_timestamp'] = pd.to_datetime(df['start_timestamp'])

# Get the latest date in your dataset
latest_date = df['start_timestamp'].max()

# Calculate the start date of the last week
end_date = latest_date - pd.Timedelta(days=7)

# Filter rows for the last week
pre_data = df[df['start_timestamp'] < end_date]

print(len(pre_data))


131


In [28]:
# Create a map centered around the average location
marker_cluster = MarkerCluster().add_to(m)
# m = folium.Map(location=[pre_data['latitude'].mean(), pre_data['longitude'].mean()], zoom_start=13)
# Add points for each location in the DataFrame
for _, row in pre_data.iterrows():
    folium.Marker(
        location=[row['latitude'], row['longitude']],
        popup=f"Start: {row['start_timestamp']}, End: {row['end_timestamp']}",
        icon=folium.Icon(color='blue')
    ).add_to(m)

# Display the map
m

In [29]:
# from folium.plugins import MarkerCluster
# Create a MarkerCluster object
marker_cluster = MarkerCluster().add_to(m)

# Add markers to the cluster instead of the map
for _, row in pre_data.iterrows():
    folium.Marker(
        location=[row['latitude'], row['longitude']],
        popup=f"Start: {row['start_timestamp']}, End: {row['end_timestamp']}",
        icon=folium.Icon(color='blue')
    ).add_to(marker_cluster)
    
for _, row in new_places_df.iterrows():
    folium.Marker(
        location=[row['latitude'], row['longitude']],
        popup=f"Start: {row['start_timestamp']}, End: {row['end_timestamp']}",
        icon=folium.Icon(color='red')
    ).add_to(marker_cluster)
m