# Google Photos Extractor
Filters through Google Photos data obtained from Google Takeout, and finds only the pictures that were taken on Yale campus.

- Go to Google Takeout
- Unzip all downloaded files, put them all in `data/`

So your file structure will look like this:
- data
  - Takeout
    - Google Photos
      - Photos from 2023
      - Photos from 2024
      - ...
  - Takeout
    - Google Photos
      - Photos from 2023
      - Photos from 2024
      - ...

In [28]:
!pip3 install geopy
import os
import json
from geopy import distance
import shutil


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.2.2[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [35]:
photos = []

for takeout_dir in os.listdir("data"):
    google_photos_dir_path = os.path.join("data", takeout_dir, "Google Photos")

    if not os.path.isdir(google_photos_dir_path):
        continue

    print(google_photos_dir_path)
    for year_dir in os.listdir(google_photos_dir_path):
        year_dir_path = os.path.join(google_photos_dir_path, year_dir)

        if not os.path.isdir(year_dir_path):
            continue

        print(year_dir_path)
        
        for file in os.listdir(year_dir_path):
            if file.endswith(".jpg"):
                file_path = os.path.join(year_dir_path, file)
                file_metadata_path = file_path + ".json"

                if not os.path.isfile(file_metadata_path):
                    continue

                photos.append({
                    "file": file_path,
                    "metadata": file_metadata_path
                })

print(f"Found {len(photos)} photos with metadata")

data/Takeout-03/Google Photos
data/Takeout-03/Google Photos/Photos from 2024
data/Takeout-03/Google Photos/Photos from 2023
data/Takeout-03/Google Photos/Photos from 2022
data/Takeout-03/Google Photos/Photos from 2013
data/Takeout-03/Google Photos/Photos from 2014
data/Takeout-03/Google Photos/Photos from 2016
data/Takeout-03/Google Photos/Photos from 2020
data/Takeout-03/Google Photos/Photos from 2018
data/Takeout-03/Google Photos/Photos from 2019
data/Takeout-03/Google Photos/Photos from 2021
data/Takeout-03/Google Photos/Photos from 2017
data/Takeout-04/Google Photos
data/Takeout-04/Google Photos/Photos from 2024
data/Takeout-04/Google Photos/Photos from 2023
data/Takeout-04/Google Photos/Photos from 2022
data/Takeout-04/Google Photos/Photos from 2013
data/Takeout-04/Google Photos/Photos from 2014
data/Takeout-04/Google Photos/Photos from 2020
data/Takeout-04/Google Photos/Photos from 2019
data/Takeout-04/Google Photos/Photos from 2021
data/Takeout-04/Google Photos/Photos from 2017


In [31]:
YALE_LATITUDE = 41.3166072
YALE_LONGITUDE = -72.9236719
YALE_RADIUS = 1
yale_coords = (YALE_LATITUDE, YALE_LONGITUDE)

In [36]:
photos_filtered = []

for photo in photos:
    with open(photo["metadata"], "r") as f:
        metadata = f.read()
        metadata = json.loads(metadata)

        if metadata["geoData"] is not None:
            latitude = metadata["geoData"]["latitude"]
            longitude = metadata["geoData"]["longitude"]
        elif metadata["geoDataExif"] is not None:
            latitude = metadata["geoDataExif"]["latitude"]
            longitude = metadata["geoDataExif"]["longitude"]
        else:
            continue

        photo_coords = (latitude, longitude)
        dist = distance.distance(photo_coords, yale_coords).mi
        if dist > YALE_RADIUS:
            continue
        
        photos_filtered.append(photo)

print(f"Found {len(photos_filtered)} photos within {YALE_RADIUS} miles of Yale")

output_dir = "output"
os.makedirs(output_dir, exist_ok=True)
for photo in photos_filtered:
    shutil.copy(photo["file"], output_dir)

Found 186 photos within 1 miles of Yale
