## Why
When exporting photos from Google Photos you get a dir structure that looks like the one below. We want to search recursively for all metadata files that end in .json and move them to another directory tree with the same folder structure. 

We create `fn_meta_src` and `fn_meta_dst` as two python lists containing the absolute paths to metadata files before and after they were moved

In [90]:
import os
import glob
import shutil
import pandas as pd
from shutil import copyfile
pd.options.display.max_rows = 1000
pd.set_option('max_colwidth', 100)

    |-- basedir
    |   |-- 2016-01-01
    |   |   |-- filename-1.jpg
    |   |   |-- filename-2.jpg.json
    |   |   |-- filename-3.jpg
    |   |   |-- filename-3.jpg.json
    |   |-- 2016-01-02


In [91]:
# Directory containing the export (extracted archive)
base_path = f'/Users/lorismarini/Desktop/google-photos-elisa-takeout'

# Directory for the output (files in folders)
output_path = f'/Users/lorismarini/Desktop/google-photos-elisa-to-synology'

# Search for all files recursively
files_all = [name for name in glob.glob(f"{base_path}/**/*.*", recursive=True)]

# Filter out all json files
files_keep = [name for name in files_all if not name.endswith(".json")]

# Extract basenames of files to keep
basenames_keep = [os.path.basename(f) for f in files_keep]

In [92]:
# Build a dataframe
df = pd.DataFrame({"abspath_src":files_keep, "basenames":basenames_keep})

# Determine if the first two characters of the basename are letters
df["is_alpha"] = df["basenames"].apply(lambda x: x[:2].isalpha())

# --------------- PARSE TIME --------------------

def timestring_starts_number(n):
    """Parse time for filenames starting with numbers
    """
    return n.split("_")[0]

def timestring_starts_alpha(n):
    """Parse time for filenames starting with letters
    """
    if "-" in n:
        parts = n.split("-")
    elif "_" in n:
        parts = n.split("_")
    else:
        return ""
    if len(parts)>1:
        return parts[1]
    else:
        return parts[0]
    
# Parse for is_alpha = True
alpha = df["is_alpha"]==True
df.loc[alpha, "time"] = pd.to_datetime(df.loc[alpha, "basenames"].apply(timestring_starts_alpha), errors="coerce")

# Parse for is_alpha = False
where = df["is_alpha"]==False
df.loc[where, "time"] = pd.to_datetime(df.loc[where, "basenames"].apply(timestring_starts_number), errors="coerce")

# Folder name from datetime
df["basedir_dest"] = df["time"].dt.date.astype(str)

# Dirname destination path
df["dirname_dest"] = df["basedir_dest"].apply(lambda x: os.path.join(output_path, x))

# Abspath destination
df["abspath_dest"] = df["dirname_dest"] + "/" + df["basenames"]

files_to_move = df[~df["time"].isnull()].shape[0]
files_ignored = df[df["time"].isnull()].shape[0]

print(f"{files_to_move} files to move...")
print(f"{files_ignored} files ingored...")

13929 files to move...
480 files ingored...


In [88]:
df_tomove

Unnamed: 0,abspath_src,basenames,is_alpha,time,basedir_dest,dirname_dest,abspath_dest
0,/Users/lorismarini/Desktop/google-photos-elisa-takeout/Takeout_01/Google Photos/2020-04-28/IMG-2...,IMG-20200428-WA0006(1).jpg,True,2020-04-28,2020-04-28,/Users/lorismarini/Desktop/google-photos-elisa-to-synology/2020-04-28,/Users/lorismarini/Desktop/google-photos-elisa-to-synology/2020-04-28/IMG-20200428-WA0006(1).jpg
1,/Users/lorismarini/Desktop/google-photos-elisa-takeout/Takeout_01/Google Photos/2020-04-28/20200...,20200428_070028.jpg,False,2020-04-28,2020-04-28,/Users/lorismarini/Desktop/google-photos-elisa-to-synology/2020-04-28,/Users/lorismarini/Desktop/google-photos-elisa-to-synology/2020-04-28/20200428_070028.jpg
2,/Users/lorismarini/Desktop/google-photos-elisa-takeout/Takeout_01/Google Photos/2020-04-28/IMG-2...,IMG-20200428-WA0016(1).jpg,True,2020-04-28,2020-04-28,/Users/lorismarini/Desktop/google-photos-elisa-to-synology/2020-04-28,/Users/lorismarini/Desktop/google-photos-elisa-to-synology/2020-04-28/IMG-20200428-WA0016(1).jpg
3,/Users/lorismarini/Desktop/google-photos-elisa-takeout/Takeout_01/Google Photos/2020-04-28/IMG-2...,IMG-20200428-WA0008(1).jpg,True,2020-04-28,2020-04-28,/Users/lorismarini/Desktop/google-photos-elisa-to-synology/2020-04-28,/Users/lorismarini/Desktop/google-photos-elisa-to-synology/2020-04-28/IMG-20200428-WA0008(1).jpg
4,/Users/lorismarini/Desktop/google-photos-elisa-takeout/Takeout_01/Google Photos/2020-04-28/20200...,20200428_152934.jpg,False,2020-04-28,2020-04-28,/Users/lorismarini/Desktop/google-photos-elisa-to-synology/2020-04-28,/Users/lorismarini/Desktop/google-photos-elisa-to-synology/2020-04-28/20200428_152934.jpg
...,...,...,...,...,...,...,...
14404,/Users/lorismarini/Desktop/google-photos-elisa-takeout/Takeout 7/Google Photos/2017-05-18/IMG_20...,IMG_20170518_101928.jpg,True,2017-05-18,2017-05-18,/Users/lorismarini/Desktop/google-photos-elisa-to-synology/2017-05-18,/Users/lorismarini/Desktop/google-photos-elisa-to-synology/2017-05-18/IMG_20170518_101928.jpg
14405,/Users/lorismarini/Desktop/google-photos-elisa-takeout/Takeout 7/Google Photos/2017-05-18/IMG_20...,IMG_20170518_180255_HDR.jpg,True,2017-05-18,2017-05-18,/Users/lorismarini/Desktop/google-photos-elisa-to-synology/2017-05-18,/Users/lorismarini/Desktop/google-photos-elisa-to-synology/2017-05-18/IMG_20170518_180255_HDR.jpg
14406,/Users/lorismarini/Desktop/google-photos-elisa-takeout/Takeout 7/Google Photos/2017-05-18/IMG_20...,IMG_20170518_121625.jpg,True,2017-05-18,2017-05-18,/Users/lorismarini/Desktop/google-photos-elisa-to-synology/2017-05-18,/Users/lorismarini/Desktop/google-photos-elisa-to-synology/2017-05-18/IMG_20170518_121625.jpg
14407,/Users/lorismarini/Desktop/google-photos-elisa-takeout/Takeout 7/Google Photos/2017-05-18/IMG_20...,IMG_20170518_101907.jpg,True,2017-05-18,2017-05-18,/Users/lorismarini/Desktop/google-photos-elisa-to-synology/2017-05-18,/Users/lorismarini/Desktop/google-photos-elisa-to-synology/2017-05-18/IMG_20170518_101907.jpg


In [79]:
df_tomove = df[~df["time"].isnull()]

# Create destination directories if they don't exist
dirname_dest_unique = df_tomove["dirname_dest"].unique()
_ = [os.makedirs(d) for d in dirname_dest_unique]
print(f"{len(dirname_dest_unique)} directories consolidated...")

1158 directories consolidated...


In [89]:
moved = 0
already_moved = 0
lost = 0
for i in df_tomove.index:

    # Move file
    src = df_tomove.loc[i,"abspath_src"]
    dst = df_tomove.loc[i,"abspath_dest"]
    
    if os.path.exists(src):
        # Move file
        shutil.move(src, dst)
        moved += 1
    elif os.path.exists(dst):
        # File already moved skip
        already_moved += 1
        pass
    else:
        lost += 1
        print(f"DATA INTEGRITY ERROR, File not found in src {src} or dst {dst}!")
        
print(f"{moved} moved")
print(f"{already_moved} already moved")
print(f"{lost} lost")

0 moved
13929 already moved
0 lost


In [20]:
import os
os.stat(file_names[0])

os.stat_result(st_mode=33261, st_ino=12888549170, st_dev=16777220, st_nlink=1, st_uid=501, st_gid=20, st_size=33495847, st_atime=1568179058, st_mtime=1567890000, st_ctime=1568178688)

In [33]:
dir_name_photos = "loris_google_photos_takeout"
base_path = f'/Users/lorismarini/Desktop/{dir_name_photos}'

# Search for all files recursively
file_names = [name for name in glob.glob(f"{base_path}/**/*(1).mp4", recursive=True)]

In [35]:
for fn in file_names:
    os.remove(fn)