In [11]:
'''
This Juputer notebook performs data transformation tasks, including 
- Parsing JSON data into a pandas DataFrame into a CSV file
- Consolidating and cleaning relevant data for visualization

This analysis is done without AI.
'''
import pandas as pd

# Data Transformation

### Step 1: Convert JSON to CSV

In [None]:
# Convert JSON file to CSV
photos_metadata = pd.read_json('photos_metadata.json')
photos_metadata.to_csv('photos_metadata.csv', index=False)

Unnamed: 0,adjustment_type,adjustments,album_info,albums,burst,burst_album_info,burst_albums,burst_default_pick,burst_key,burst_photos,...,title,tzname,tzoffset,uti,uti_edited,uti_original,uti_raw,uuid,visible,width
0,2,"{'adjustment_format_version': 1, 'adjustments'...",[],[],False,[],[],False,False,[],...,,GMT-0700,-25200.0,public.jpeg,public.jpeg,public.heic,,2BCE44A9-50D0-4256-825D-00C72E44F7EE,True,4032
1,0,{},[],[],False,[],[],False,False,[],...,,GMT-0700,-25200.0,public.heic,,public.heic,,97478BF8-AD47-4C4F-9F50-2FD2F049EC3B,True,3024
2,0,{},[],[],False,[],[],False,False,[],...,,,,public.jpeg,,public.jpeg,,679858E9-4FB9-4E01-92E6-00A663306AA6,True,960
3,0,{},[{'creation_date': '2025-04-21T13:58:37.272909...,[Dance Co World Tour],False,[{'creation_date': '2025-04-21T13:58:37.272909...,[Dance Co World Tour],False,False,[],...,,,,public.jpeg,,public.jpeg,,B98F71F2-E19F-4DB0-B2C9-36059C7D9618,False,1537
4,0,{},[{'creation_date': '2024-12-16T22:37:04.988281...,[WhatsApp],False,[{'creation_date': '2024-12-16T22:37:04.988281...,[WhatsApp],False,False,[],...,,America/Los_Angeles,-25200.0,public.jpeg,,public.jpeg,,2D5BB74A-9397-4BA0-9839-97D78E12C1A4,True,768
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23201,0,{},[],[],False,[],[],False,False,[],...,,GMT-0700,-25200.0,public.heic,,public.heic,,50D08DB2-1C0E-4459-9DDB-1361A918AECE,True,3024
23202,2,"{'adjustment_format_version': None, 'adjustmen...",[],[],False,[],[],False,False,[],...,,America/Los_Angeles,-28800.0,public.jpeg,public.jpeg,public.png,,66062788-9D3F-49A7-948B-F1009358F3DA,True,750
23203,0,{},[],[],False,[],[],False,False,[],...,,America/Los_Angeles,-25200.0,public.jpeg,,public.jpeg,,ECD544F2-2B51-4D56-9018-54DFED8A2190,True,2448
23204,0,{},[],[],False,[],[],False,False,[],...,,America/New_York,-14400.0,public.jpeg,,public.jpeg,,9369DFB4-DAD0-4C96-ABDA-12624E8B0D06,True,960


### Step 2: Consolidate and Clean Data

In [None]:
# Remove rows where I do not own photos to ensure only photos that I took or actively downloaded are included
consolidated_data = photos_metadata[photos_metadata['owner'].isnull()]
consolidated_data = consolidated_data[consolidated_data['folders'] != {'WhatsApp': []}] # What's App photos automatically downloaded

# Consolidate data to most important parts for visualization that reflects me personally
pd.set_option('display.max_columns', None)
consolidated_data = consolidated_data[['date_original', 'favorite', 
                 'folders', 'hasadjustments', 'height', 'isphoto',
                 'labels_normalized', 'latitude', 'longitude', 'live_photo', 
                 'original_filesize', 'original_orientation', 'original_width',
                 'panorama', 'place', 'portrait', 'score', 'screen_recording',
                 'screenshot', 'search_info_normalized', 'selfie', 'shared', 'owner']]

# Convert date to datetime format and write as YYYY-MM-DD HH:MM:SS
consolidated_data['date_original'] = consolidated_data['date_original'].str.slice(0,19)
consolidated_data['date_original'] = pd.to_datetime(consolidated_data['date_original'], errors='coerce', utc=True)
consolidated_data['date_original'] = consolidated_data['date_original'].dt.strftime('%Y-%m-%d %H:%M:%S')

# Sort by date
consolidated_data = consolidated_data.sort_values(by='date_original')
consolidated_data

Unnamed: 0,date_original,favorite,folders,hasadjustments,height,isphoto,labels_normalized,latitude,longitude,live_photo,original_filesize,original_orientation,original_width,panorama,place,portrait,score,screen_recording,screenshot,search_info_normalized,selfie,shared,owner
12276,2009-06-16 17:33:54,False,{},False,480,True,"[art, manicure, nail polish]",,,False,152334,1,640,False,{},False,"{'behavioral': 0.7061946988105771, 'curation':...",False,False,"{'activities': [], 'bodies_of_water': [], 'cam...",False,False,
13828,2012-01-16 10:42:08,False,{},False,652,True,"[art, close-up, manicure, nail polish]",37.795928,-122.408822,False,53513,1,800,False,"{'address': {'city': 'San Francisco', 'country...",False,"{'behavioral': 0.7053539752960201, 'curation':...",False,False,"{'activities': [], 'bodies_of_water': [], 'cam...",False,False,
1812,2014-03-22 15:31:33,False,{},False,294,True,[],,,False,54484,1,241,False,{},False,"{'behavioral': 0.10000000149011601, 'curation'...",False,False,"{'activities': [], 'bodies_of_water': [], 'cam...",False,False,
12237,2014-03-22 15:31:33,False,{},False,294,True,[],,,False,54484,1,241,False,{},False,"{'behavioral': 0.10000000149011601, 'curation'...",False,False,"{'activities': [], 'bodies_of_water': [], 'cam...",False,False,
20020,2014-03-22 15:31:33,False,{},False,294,True,[],,,False,54484,1,241,False,{},False,"{'behavioral': 0.10000000149011601, 'curation'...",False,False,"{'activities': [], 'bodies_of_water': [], 'cam...",False,False,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13677,2022-06-20 18:30:53,False,{},False,4032,True,"[art, building, ceiling, consumer electronics,...",37.721625,-121.875183,False,1926458,6,4032,False,"{'address': {'city': 'Dublin', 'country': 'Uni...",False,"{'behavioral': 0.704380512237548, 'curation': ...",False,False,"{'activities': [], 'bodies_of_water': [], 'cam...",False,False,
19217,2022-06-20 18:47:00,False,{},False,1920,False,"[building, cabinet, ceiling, clothing, furnitu...",37.721600,-121.875200,False,12421778,1,1080,False,"{'address': {'city': 'Dublin', 'country': 'Uni...",False,"{'behavioral': 0.705265462398529, 'curation': ...",False,False,"{'activities': [], 'bodies_of_water': [], 'cam...",False,False,
1686,2025-02-28 15:40:29,False,{},False,3024,True,"[clothing, jeans, people]",34.412645,-119.848070,False,0,1,4032,False,"{'address': {'city': 'Goleta', 'country': 'Uni...",False,"{'behavioral': 0.0, 'curation': 0.5, 'failure'...",False,False,"{'activities': [], 'bodies_of_water': [], 'cam...",False,False,
4098,2025-02-28 17:39:31,False,{},False,4032,True,"[cloudy, furniture, interior room, outdoor, pe...",34.413222,-119.855553,False,0,6,4032,False,"{'address': {'city': 'Goleta', 'country': 'Uni...",False,"{'behavioral': 0.0, 'curation': 0.5, 'failure'...",False,False,"{'activities': [], 'bodies_of_water': [], 'cam...",False,False,


In [62]:
consolidated_data[consolidated_data['folders'] == {'Instagram': []} ]

Unnamed: 0,date_original,favorite,folders,hasadjustments,height,isphoto,labels_normalized,latitude,longitude,live_photo,original_filesize,original_orientation,original_width,panorama,place,portrait,score,screen_recording,screenshot,search_info_normalized,selfie,shared,owner
12868,2018-06-03 16:38:03,False,{'Instagram': []},False,1922,True,"[stuffed animals, toy]",,,False,310070,1,1080,False,{},False,"{'behavioral': 0.702256619930267, 'curation': ...",False,False,"{'activities': [], 'bodies_of_water': [], 'cam...",False,False,
15834,2018-06-19 11:18:14,False,{'Instagram': []},False,1922,True,[],,,False,178212,1,1080,False,{},False,"{'behavioral': 0.100309737026691, 'curation': ...",False,False,"{'activities': [], 'bodies_of_water': [], 'cam...",False,False,
12259,2019-05-03 12:08:39,False,{'Instagram': []},False,2447,True,"[branch, foliage, outdoor, plant, sky]",37.305208,-121.998553,False,707511,1,2447,False,"{'address': {'city': 'San Jose', 'country': 'U...",False,"{'behavioral': 0.7030088305473321, 'curation':...",False,False,"{'activities': [], 'bodies_of_water': [], 'cam...",False,False,
9389,2019-06-16 22:28:25,False,{'Instagram': []},False,1366,True,"[clothing, footwear, shoes]",,,False,363671,1,768,False,{},False,"{'behavioral': 0.703539788722991, 'curation': ...",False,False,"{'activities': [], 'bodies_of_water': [], 'cam...",False,False,
2004,2019-07-04 22:16:54,False,{'Instagram': []},False,720,False,"[building, darkness, outdoor, sky]",,,False,708614,1,720,False,{},False,"{'behavioral': 0.101681418716907, 'curation': ...",False,False,"{'activities': ['celebration', 'holiday'], 'bo...",False,False,
6015,2019-07-26 15:22:28,False,{'Instagram': []},False,2447,True,"[celestial body, darkness, outdoor, sky, star]",37.307512,-121.998153,False,1345038,1,2447,False,"{'address': {'city': 'San Jose', 'country': 'U...",False,"{'behavioral': 0.101283185184001, 'curation': ...",False,False,"{'activities': [], 'bodies_of_water': [], 'cam...",False,False,
20771,2019-07-29 16:38:29,False,{'Instagram': []},False,1334,True,[document],,,False,149596,1,750,False,{},False,"{'behavioral': 0.7020796537399291, 'curation':...",False,False,"{'activities': [], 'bodies_of_water': [], 'cam...",False,False,
15912,2019-08-05 21:52:31,False,{'Instagram': []},False,812,True,"[bowling pin, recreation, sport, sports equipm...",37.3271,-122.013656,False,103754,1,812,False,"{'address': {'city': 'Cupertino', 'country': '...",False,"{'behavioral': 0.7053097486495971, 'curation':...",False,False,"{'activities': ['dining', 'dinner'], 'bodies_o...",False,False,
22295,2019-08-11 13:36:43,False,{'Instagram': []},False,1713,True,"[crepe myrtle, flower, foliage, plant]",37.30777,-121.998145,False,637597,1,1713,False,"{'address': {'city': 'San Jose', 'country': 'U...",False,"{'behavioral': 0.101592920720577, 'curation': ...",False,False,"{'activities': [], 'bodies_of_water': [], 'cam...",False,False,
16891,2019-10-27 12:46:21,False,{'Instagram': []},False,1279,True,"[banner, clothing, crowd, headgear, people, sign]",,,False,273811,1,1279,False,{},False,"{'behavioral': 0.704247772693634, 'curation': ...",False,False,"{'activities': [], 'bodies_of_water': [], 'cam...",False,False,
