<a href="https://colab.research.google.com/github/NickJTutt/GoogleTimeline/blob/main/Timeline_Extract.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Timeline Extract**

Complete Google Timeline Extract and get "Semantic History".

Place all months required to extract in single folder.

In [None]:
# Function Definition
import json
import csv
import math
from datetime import datetime, timedelta
import os

def extract_location_data(file_path):
    data = {}
    with open(file_path, 'r') as file:
        data = json.load(file)

    location_data = []

    for timelineObject in data['timelineObjects']:
       # print('timelineObjects')
        # Check if the timelineObject is a 'placeVisit'
        if 'placeVisit' in timelineObject:
            place_visit = timelineObject['placeVisit']
           # print('placeVisit')
            # Extracting the details
            location = place_visit['location']
            duration = place_visit['duration']

            # Converting latitude and longitude to standard format
            latitude = location['latitudeE7'] / 1e7
          #  print(latitude)
            longitude = location['longitudeE7'] / 1e7
         #   print(longitude)

            # Extracting start and end timestamps
            start_timestamp = duration['startTimestamp']
            end_timestamp = duration['endTimestamp']

            # Extracting address
            if 'address' in location:
              address = location['address']
            else:
              address = 0

            location_data.append({
            'latitude': latitude,
            'longitude': longitude,
            'startTimestamp': start_timestamp,
            'endTimestamp': end_timestamp,
            'address': address
            })
        #print(location_data)

    # Convert each dictionary in location_data to a tuple and add it to a set
    # This will automatically remove any duplicates because sets only allow unique values
    location_set = set(tuple(location.items()) for location in location_data)

    # Convert the set back to a list of dictionaries
    unique_location_data = [dict(location) for location in location_set]
    sorted_location_data = sorted(unique_location_data, key=lambda x: x['startTimestamp'])
    for i in range(1, len(sorted_location_data)):
        sorted_location_data[i]['distance'] = calculate_distance(
            sorted_location_data[i-1]['latitude'], sorted_location_data[i-1]['longitude'],
            sorted_location_data[i]['latitude'], sorted_location_data[i]['longitude']
        )
    sorted_location_data[0]['distance'] = 0  # for the first location
    adjusted_location_data = adjust_timestamps(sorted_location_data)
    return adjusted_location_data

def write_to_csv(location_data, csv_file_path):
    # Define the CSV column headers
    fieldnames = ['latitude', 'longitude', 'startTimestamp', 'endTimestamp', 'address', 'distance']

    # Write the data to a CSV file
    with open(csv_file_path, 'w', newline='') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

        writer.writeheader()
        for row in location_data:
            writer.writerow(row)


def calculate_distance(lat1, lon1, lat2, lon2):
    # Radius of the Earth in km
    R = 6371.0

    # Convert degrees to radians
    lat1_rad = math.radians(lat1)
    lon1_rad = math.radians(lon1)
    lat2_rad = math.radians(lat2)
    lon2_rad = math.radians(lon2)

    # Differences
    dlon = lon2_rad - lon1_rad
    dlat = lat2_rad - lat1_rad

    # Haversine formula
    a = math.sin(dlat / 2)**2 + math.cos(lat1_rad) * math.cos(lat2_rad) * math.sin(dlon / 2)**2
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))

    # Distance
    distance = R * c
    return distance

def reformat_timestamp(timestamp):
    if '.' in timestamp:
        # Format with milliseconds
        dt_format = '%Y-%m-%dT%H:%M:%S.%fZ'
    else:
        # Format without milliseconds
        dt_format = '%Y-%m-%dT%H:%M:%SZ'

    # Parse the timestamp
    dt = datetime.strptime(timestamp, dt_format)

    # Add two hours
    dt += timedelta(hours=2)

    # Reformat the timestamp
    return dt.strftime('%Y-%m-%d %H:%M')

def adjust_timestamps(location_data):
    for location in location_data:
        location['startTimestamp'] = reformat_timestamp(location['startTimestamp'])
        location['endTimestamp'] = reformat_timestamp(location['endTimestamp'])
    return location_data

def extract_data_from_multiple_files(directory):
    # Get a list of all files in the directory
    files = os.listdir(directory)

    # Initialize an empty list to store all location data
    all_location_data = []

    # Iterate over each file
    for file_name in files:
      #print(file_name)
    # Check if the file has a .json extension
      if file_name.lower().endswith(".json"):
          file_path = directory+'/'+file_name
          # Extract location data from the file
          location_data = extract_location_data(file_path)
          #print(len(location_data))
          # Add the location data to the main list
          all_location_data.extend(location_data)


    return all_location_data

In [None]:
# Usage
directory = '/content/drive/MyDrive/2023' # location of .JSON files from google timeline semantic
all_location_data = extract_data_from_multiple_files(directory)
#Write to CSV
csv_file_path = '/content/location_data.csv' #Where you want to save the csv output
write_to_csv(all_location_data, csv_file_path)
print('Saved to '+ csv_file_path)

Saved to /content/location_data.csv
