In [1]:
#script to read in a directory of files and extract date_created for JPGs

import os
import exifread
import pandas as pd

In [2]:
#define funcitons

def get_creation_date(file_path):
    try:
        with open(file_path, 'rb') as f:
            tags = exifread.process_file(f, details=False, stop_tag='EXIF DateTimeOriginal')
            if 'EXIF DateTimeOriginal' in tags:
                return tags['EXIF DateTimeOriginal']
            else:
                return None
    except Exception as e:
        return str(e)

def extract_metadata_from_directory(root_directory):
    file_list = []
    for root, _, files in os.walk(root_directory):
        for file_name in files:
            if file_name.lower().endswith('.jpg'):
                file_path = os.path.join(root, file_name)
                creation_date = get_creation_date(file_path)
                file_list.append({
                    'Filename': file_name,
                    'Full Path': file_path,
                    'Creation Date': creation_date
                })
    return file_list

In [4]:
# Set directory path
root_directory = '/Users/caraappel/Documents/CV4E/data'
metadata_list = extract_metadata_from_directory(root_directory)

# Create a Pandas DataFrame from the list of metadata
df = pd.DataFrame(metadata_list)

# Display the DataFrame
print(df)

# Optionally, you can save the DataFrame to a CSV file
df.to_csv('/Users/caraappel/Documents/CV4E/oregon_critters/dataset_date_created_exif.csv', index=False)



                                           Filename  \
0       GRID_05_05B__2020-04-13__06-21-37(3)_aa.JPG   
1       GRID_05_05B__2020-04-17__19-28-39(3)_aa.JPG   
2       GRID_05_05A__2021-10-31__11-43-44(1)_aa.JPG   
3       GRID_05_05B__2020-04-17__19-28-39(2)_aa.JPG   
4       GRID_05_05B__2020-04-13__06-21-36(1)_aa.JPG   
...                                             ...   
119567                Drive_06_03140025_3801_jn.JPG   
119568                   Drive_03_RCNX0050_1_jn.JPG   
119569                Drive_06_04150135_4035_jn.JPG   
119570                     Drive_05_RCNX0122_jn.JPG   
119571                     Drive_01_07040082_jn.JPG   

                                                Full Path        Creation Date  
0       /Users/caraappel/Documents/CV4E/data/HJA_GRID/...  2020:04:13 06:21:37  
1       /Users/caraappel/Documents/CV4E/data/HJA_GRID/...  2020:04:17 19:28:39  
2       /Users/caraappel/Documents/CV4E/data/HJA_GRID/...  2021:10:31 11:43:44  
3       /Users/