In [1]:
import ijson
import json
import numpy as np
import pandas as pd
import os
from pandas.io.json import json_normalize

In [2]:
google_celebrity_json_path='./inputs/Google/FRIENDS - Hulu (Japan)_celebrity.json'
google_logo_json_path='./inputs/Google/FRIENDS - Hulu (Japan)_logo.json'
aws_json_path = './inputs/Amazon/FRIENDS_Hulu_Japan.json'

# Google Response Parser:

In [3]:
class GoogleJSONParser:
    def __init__(self, google_celebrity_json_path, google_logo_json_path):
        self.google_celebrity_json_path = google_celebrity_json_path
        self.google_logo_json_path = google_logo_json_path
  
    def get_celebrity_info(self):
        with open(self.google_celebrity_json_path, 'r') as f:
            celebrity_tracks_list = list(ijson.items(f, 'annotation_results.item.celebrity_recognition_annotations.celebrity_tracks.item'))

        df = json_normalize(celebrity_tracks_list)
        df['source'] = 'google'
        df['id'] = np.arange(len(df))

        # handle celebrities
        df = df.explode('celebrities')
        df['name'] = df['celebrities'].apply(lambda x: x['celebrity']['display_name'] if pd.notna(x) else x)
        df['url'] = df['celebrities'].apply(lambda x: x['celebrity']['name'] if pd.notna(x) else x)
        df['confidence'] = df['celebrities'].apply(lambda x: x['confidence'] if pd.notna(x) else x)
        df.drop('celebrities', axis=1, inplace=True)
        
        # handle segment times
        df.update(df[['face_track.segment.start_time_offset.seconds', 'face_track.segment.start_time_offset.nanos', \
                      'face_track.segment.end_time_offset.seconds', 'face_track.segment.end_time_offset.nanos']].fillna(0))
        df['segment.start_time'] = df['face_track.segment.start_time_offset.seconds'] + \
                                   df['face_track.segment.start_time_offset.nanos']/1e9
        df['segment.end_time'] = df['face_track.segment.end_time_offset.seconds'] + \
                                 df['face_track.segment.end_time_offset.nanos']/1e9
        df.drop(['face_track.segment.start_time_offset.seconds', 'face_track.segment.start_time_offset.nanos', \
                 'face_track.segment.end_time_offset.seconds', 'face_track.segment.end_time_offset.nanos'], axis=1, inplace=True)
        segment_df = df.drop('face_track.timestamped_objects', axis=1)

        # handle timestamped objects.
        timestamp_df = df.drop(['segment.start_time', 'segment.end_time'], axis=1).dropna(subset=['name', 'url'])
        timestamp_df = timestamp_df.explode('face_track.timestamped_objects').reset_index(drop=True)
        temp = json_normalize(timestamp_df['face_track.timestamped_objects'])
        temp.update(temp[['time_offset.seconds', 'time_offset.nanos']].fillna(0))
        temp['timestamp'] = temp['time_offset.seconds'] + temp['time_offset.nanos']/1e9
        timestamp_df = timestamp_df.join(temp)
        timestamp_df.drop(['time_offset.seconds', 'time_offset.nanos', 'face_track.timestamped_objects'], axis=1, inplace=True)
        return segment_df, timestamp_df

    def get_logo_info(self):
        pass
    
    def get_label_info(self):
        pass
    
    def get_text_info(self):
        pass
    
    def get_speech_info(self):
        pass
    
    def get_shots_info(self):
        pass
    
    def get_content_moderation_info(self):
        pass


In [4]:
googleParser = GoogleJSONParser(google_celebrity_json_path, google_logo_json_path)
gcp_segment_df, gcp_timestamp_df = googleParser.get_celebrity_info()
gcp_segment_df

Unnamed: 0,source,id,name,url,confidence,segment.start_time,segment.end_time
0,google,0,Courteney Cox,video-intelligence//m/01rr9f,0.82868701,0.0,1.126126
1,google,1,Matt LeBlanc,video-intelligence//m/01rrd4,0.80535632,0.0,1.126126
2,google,2,,,,0.0,1.126126
3,google,3,,,,0.0,1.126126
4,google,4,Lisa Kudrow,video-intelligence//m/016tb7,0.81934357,0.0,1.126126
5,google,5,,,,1.251251,1.501501
6,google,6,,,,1.501501,1.876876
7,google,7,,,,1.751751,2.377377
8,google,8,,,,2.127127,2.377377
9,google,9,,,,2.627627,2.877877


In [5]:
gcp_timestamp_df

Unnamed: 0,source,id,name,url,confidence,normalized_bounding_box.left,normalized_bounding_box.top,normalized_bounding_box.right,normalized_bounding_box.bottom,timestamp
0,google,0,Courteney Cox,video-intelligence//m/01rr9f,0.82868701,0.62083334,0.40925926,0.72500002,0.6259259,0.000000
1,google,0,Courteney Cox,video-intelligence//m/01rr9f,0.82868701,0.61979169,0.40925926,0.72604167,0.62777776,0.125125
2,google,0,Courteney Cox,video-intelligence//m/01rr9f,0.82868701,0.61874998,0.41111112,0.72604167,0.63148147,0.250250
3,google,0,Courteney Cox,video-intelligence//m/01rr9f,0.82868701,0.61874998,0.41296297,0.72500002,0.63333333,0.375375
4,google,0,Courteney Cox,video-intelligence//m/01rr9f,0.82868701,0.61770833,0.41296297,0.72395831,0.63333333,0.500500
...,...,...,...,...,...,...,...,...,...,...
112,google,49,Matt LeBlanc,video-intelligence//m/01rrd4,0.81547713,0.12291667,0.13148148,0.21250001,0.31851852,26.901901
113,google,49,Matt LeBlanc,video-intelligence//m/01rrd4,0.81547713,0.12916666,0.13518518,0.21770833,0.31851852,27.027027
114,google,49,Matt LeBlanc,video-intelligence//m/01rrd4,0.81547713,0.14166667,0.14259259,0.22812501,0.32037038,27.152152
115,google,49,Matt LeBlanc,video-intelligence//m/01rrd4,0.81547713,0.15520833,0.14814815,0.24062499,0.32592592,27.277277


# Amazon Reponse Parser:

In [6]:
class AmazonJSONParser:
    def __init__(self, aws_json_path):
        self.aws_json_path = aws_json_path

    def get_celebrity_info(self):
        with open(self.aws_json_path, 'r') as f:
            celebrity_tracks_list = list(ijson.items(f, 'Celebrities.item'))
        df = json_normalize(celebrity_tracks_list)
        df.drop(['Celebrity.Face.Landmarks', 'Celebrity.Face.Pose.Roll', 'Celebrity.Face.Pose.Yaw', \
                 'Celebrity.Face.Pose.Pitch', 'Celebrity.Face.Quality.Brightness', 'Celebrity.Face.Quality.Sharpness', \
                 'Celebrity.Face.Confidence'], axis=1, inplace=True)

        df['normalized_bounding_box.right'] = df['Celebrity.Face.BoundingBox.Left'] + df['Celebrity.Face.BoundingBox.Width']
        df['normalized_bounding_box.bottom'] = df['Celebrity.Face.BoundingBox.Top'] + df['Celebrity.Face.BoundingBox.Height']
        df['timestamp'] = df['Timestamp']/1000
        df['source'] = 'amazon'

        df.drop(['Celebrity.Face.BoundingBox.Width', 'Celebrity.Face.BoundingBox.Height', 'Timestamp'], axis=1, inplace=True)

        df = df.rename(columns = {"Celebrity.Urls":"url", "Celebrity.Name": "name", 
                                  "Celebrity.Id": "id", 
                                  "Celebrity.Confidence" : "confidence",
                                  "Celebrity.Face.BoundingBox.Top": "normalized_bounding_box.top",
                                  "Celebrity.Face.BoundingBox.Left": "normalized_bounding_box.left" })
        return df

    def get_logo_info(self):
        pass
    
    def get_label_info(self):
        pass
    
    def get_text_info(self):
        pass
    
    def get_speech_info(self):
        pass
    
    def get_shots_info(self):
        pass
    
    def get_content_moderation_info(self):
        pass


In [7]:
amazonParser = AmazonJSONParser(aws_json_path)
aws_timestamp_df = amazonParser.get_celebrity_info()
aws_timestamp_df

Unnamed: 0,url,name,id,confidence,normalized_bounding_box.left,normalized_bounding_box.top,normalized_bounding_box.right,normalized_bounding_box.bottom,timestamp,source
0,[www.imdb.com/name/nm0001612],Matthew Perry,2i3Ga9e,99.0,0.4447916746139526,0.1074074059724807,0.5312500074505806,0.2611111104488372,0.0,amazon
1,[www.imdb.com/name/nm0001455],Matt LeBlanc,43be2M,91.0,0.559374988079071,0.0611111111938953,0.6510416567325591,0.2240740694105625,0.0,amazon
2,[www.imdb.com/name/nm0001612],Matthew Perry,2i3Ga9e,100.0,0.421875,0.1259259283542633,0.513541668653488,0.2888888865709304,0.458,amazon
3,[www.imdb.com/name/nm0001455],Matt LeBlanc,43be2M,100.0,0.5583333373069763,0.0685185194015502,0.6500000059604644,0.2314814776182174,0.458,amazon
4,[www.imdb.com/name/nm0001612],Matthew Perry,2i3Ga9e,50.0,0.4364583194255829,0.116666667163372,0.5260416492819786,0.2759259268641472,0.959,amazon
5,[www.imdb.com/name/nm0001455],Matt LeBlanc,43be2M,97.0,0.5552083253860474,0.0611111111938953,0.6489583253860474,0.2277777828276157,0.959,amazon
6,[www.imdb.com/name/nm0001435],Lisa Kudrow,2aM9z3i,83.0,0.6864583492279053,0.1111111119389534,0.8250000178813934,0.3574074134230613,3.962,amazon
7,[www.imdb.com/name/nm0001435],Lisa Kudrow,2aM9z3i,57.0,0.7552083134651184,0.0722222253680229,0.8895833194255828,0.3111111149191856,4.462,amazon
8,[www.imdb.com/name/nm0001435],Lisa Kudrow,2aM9z3i,50.0,0.7927083373069763,0.0648148134350776,0.9270833432674408,0.3037037029862404,4.963,amazon
9,[],Caroline Garcia,1eB9rt2,50.0,0.3947916626930237,0.3222222328186035,0.4614583328366279,0.4407407492399215,9.467,amazon


# Simple concatenation of google and amazon timestamped celebrities info:

In [8]:
merged_timestamps = pd.concat([gcp_timestamp_df, aws_timestamp_df], ignore_index=True, sort=True)
merged_timestamps['added_to_segment'] = False
merged_timestamps

Unnamed: 0,confidence,id,name,normalized_bounding_box.bottom,normalized_bounding_box.left,normalized_bounding_box.right,normalized_bounding_box.top,source,timestamp,url,added_to_segment
0,0.82868701,0,Courteney Cox,0.6259259,0.62083334,0.72500002,0.40925926,google,0.000000,video-intelligence//m/01rr9f,False
1,0.82868701,0,Courteney Cox,0.62777776,0.61979169,0.72604167,0.40925926,google,0.125125,video-intelligence//m/01rr9f,False
2,0.82868701,0,Courteney Cox,0.63148147,0.61874998,0.72604167,0.41111112,google,0.250250,video-intelligence//m/01rr9f,False
3,0.82868701,0,Courteney Cox,0.63333333,0.61874998,0.72500002,0.41296297,google,0.375375,video-intelligence//m/01rr9f,False
4,0.82868701,0,Courteney Cox,0.63333333,0.61770833,0.72395831,0.41296297,google,0.500500,video-intelligence//m/01rr9f,False
...,...,...,...,...,...,...,...,...,...,...,...
143,90,3B1Ks1L,Courteney Cox,0.41666667163372042,0.59375,0.69895832985639572,0.22962963581085206,amazon,20.478000,[www.imdb.com/name/nm0001073],False
144,99,2PL7Jp7P,Rohit Roy,0.25185184925794602,0.6635416746139526,0.73229167610406872,0.12962962687015534,amazon,25.483000,[www.imdb.com/name/nm1123937],False
145,54.000003814697269,2PL7Jp7P,Rohit Roy,0.24259259551763534,0.6177083253860474,0.68854165822267536,0.11666666716337204,amazon,25.984000,[www.imdb.com/name/nm1123937],False
146,100,43be2M,Matt LeBlanc,0.30185185372829439,0.14270833134651185,0.21666666120290757,0.17037037014961244,amazon,26.985000,[www.imdb.com/name/nm0001455],False


# Map timestamps to segments:
'added_to_segment' field keeps track whether the timestamped value is mapped to any segment.

In [9]:
def add_timestamp_objects(row):
    condition = (merged_timestamps['name'] == row['name']) & (merged_timestamps['timestamp']>=row['segment.start_time']) & \
                (merged_timestamps['timestamp']<=row['segment.end_time'])
    
    merged_timestamps.loc[condition, 'added_to_segment'] = True
    temp_dict = merged_timestamps.loc[condition].to_dict('records')
    return temp_dict

gcp_segment_df['timestamped_instances'] = gcp_segment_df.apply(add_timestamp_objects, axis=1)
gcp_segment_df

Unnamed: 0,source,id,name,url,confidence,segment.start_time,segment.end_time,timestamped_instances
0,google,0,Courteney Cox,video-intelligence//m/01rr9f,0.82868701,0.0,1.126126,"[{'confidence': 0.82868701, 'id': 0, 'name': '..."
1,google,1,Matt LeBlanc,video-intelligence//m/01rrd4,0.80535632,0.0,1.126126,"[{'confidence': 0.80535632, 'id': 1, 'name': '..."
2,google,2,,,,0.0,1.126126,[]
3,google,3,,,,0.0,1.126126,[]
4,google,4,Lisa Kudrow,video-intelligence//m/016tb7,0.81934357,0.0,1.126126,"[{'confidence': 0.81934357, 'id': 4, 'name': '..."
5,google,5,,,,1.251251,1.501501,[]
6,google,6,,,,1.501501,1.876876,[]
7,google,7,,,,1.751751,2.377377,[]
8,google,8,,,,2.127127,2.377377,[]
9,google,9,,,,2.627627,2.877877,[]


# Timestamped values not mapped to any segment:
 As expected only some amazon timestamped values are left behind.

In [10]:
unmapped_timestamps = merged_timestamps.loc[merged_timestamps['added_to_segment']==False].copy()
unmapped_timestamps

Unnamed: 0,confidence,id,name,normalized_bounding_box.bottom,normalized_bounding_box.left,normalized_bounding_box.right,normalized_bounding_box.top,source,timestamp,url,added_to_segment
117,99.0,2i3Ga9e,Matthew Perry,0.2611111104488372,0.4447916746139526,0.5312500074505806,0.1074074059724807,amazon,0.0,[www.imdb.com/name/nm0001612],False
119,100.0,2i3Ga9e,Matthew Perry,0.2888888865709304,0.421875,0.513541668653488,0.1259259283542633,amazon,0.458,[www.imdb.com/name/nm0001612],False
121,50.0,2i3Ga9e,Matthew Perry,0.2759259268641472,0.4364583194255829,0.5260416492819786,0.116666667163372,amazon,0.959,[www.imdb.com/name/nm0001612],False
126,50.0,1eB9rt2,Caroline Garcia,0.4407407492399215,0.3947916626930237,0.4614583328366279,0.3222222328186035,amazon,9.467,[],False
127,75.0,1eB9rt2,Caroline Garcia,0.4370370209217071,0.4135416746139526,0.4770833402872085,0.3240740597248077,amazon,9.968,[],False
131,94.0,3qA5zC3U,William Lucas,0.3203703761100769,0.2541666626930237,0.3395833298563957,0.1703703701496124,amazon,11.97,[www.imdb.com/name/nm0524308],False
134,91.0,2i3Ga9e,Matthew Perry,0.4722222238779068,0.4208333194255829,0.5687499791383743,0.2074074000120163,amazon,14.973,[www.imdb.com/name/nm0001612],False
135,88.0,2i3Ga9e,Matthew Perry,0.4703703671693802,0.4250000119209289,0.5708333402872084,0.2111111134290695,amazon,15.473,[www.imdb.com/name/nm0001612],False
136,99.0,1lF9mf8,David Schwimmer,0.4574074149131775,0.3343749940395355,0.4697916656732559,0.2166666686534881,amazon,18.018,[www.imdb.com/name/nm0001710],False
137,100.0,1lF9mf8,David Schwimmer,0.4388888925313949,0.3479166626930237,0.4833333343267441,0.1981481462717056,amazon,18.476,[www.imdb.com/name/nm0001710],False


### The 1st 3 Matthew Perry rows belong to the same shot. We need to combine them together. Currently continuing without combining. We also need to map these to the anonymous facetracks from google based on bbox locations and timestamps.

In [11]:
unmapped_timestamps['timestamped_instances'] = unmapped_timestamps.apply(lambda x: [x.to_dict()], axis=1)
unmapped_timestamps.drop(['normalized_bounding_box.bottom', 'normalized_bounding_box.left', 'normalized_bounding_box.right', \
                          'normalized_bounding_box.top', 'added_to_segment', 'timestamp'], axis=1, inplace=True)
unmapped_timestamps

Unnamed: 0,confidence,id,name,source,url,timestamped_instances
117,99.0,2i3Ga9e,Matthew Perry,amazon,[www.imdb.com/name/nm0001612],"[{'confidence': 99, 'id': '2i3Ga9e', 'name': '..."
119,100.0,2i3Ga9e,Matthew Perry,amazon,[www.imdb.com/name/nm0001612],"[{'confidence': 100, 'id': '2i3Ga9e', 'name': ..."
121,50.0,2i3Ga9e,Matthew Perry,amazon,[www.imdb.com/name/nm0001612],"[{'confidence': 50, 'id': '2i3Ga9e', 'name': '..."
126,50.0,1eB9rt2,Caroline Garcia,amazon,[],"[{'confidence': 50, 'id': '1eB9rt2', 'name': '..."
127,75.0,1eB9rt2,Caroline Garcia,amazon,[],"[{'confidence': 75, 'id': '1eB9rt2', 'name': '..."
131,94.0,3qA5zC3U,William Lucas,amazon,[www.imdb.com/name/nm0524308],"[{'confidence': 94, 'id': '3qA5zC3U', 'name': ..."
134,91.0,2i3Ga9e,Matthew Perry,amazon,[www.imdb.com/name/nm0001612],"[{'confidence': 91, 'id': '2i3Ga9e', 'name': '..."
135,88.0,2i3Ga9e,Matthew Perry,amazon,[www.imdb.com/name/nm0001612],"[{'confidence': 88, 'id': '2i3Ga9e', 'name': '..."
136,99.0,1lF9mf8,David Schwimmer,amazon,[www.imdb.com/name/nm0001710],"[{'confidence': 99, 'id': '1lF9mf8', 'name': '..."
137,100.0,1lF9mf8,David Schwimmer,amazon,[www.imdb.com/name/nm0001710],"[{'confidence': 100, 'id': '1lF9mf8', 'name': ..."


# Concatenating segments DF and unmapped timestamps:

In [12]:
gcp_segment_df.dropna(subset=['name', 'url'], inplace=True)
merged_segments = pd.concat([gcp_segment_df, unmapped_timestamps], ignore_index=True, sort=True)
merged_segments

Unnamed: 0,confidence,id,name,segment.end_time,segment.start_time,source,timestamped_instances,url
0,0.82868701,0,Courteney Cox,1.126126,0.0,google,"[{'confidence': 0.82868701, 'id': 0, 'name': '...",video-intelligence//m/01rr9f
1,0.80535632,1,Matt LeBlanc,1.126126,0.0,google,"[{'confidence': 0.80535632, 'id': 1, 'name': '...",video-intelligence//m/01rrd4
2,0.81934357,4,Lisa Kudrow,1.126126,0.0,google,"[{'confidence': 0.81934357, 'id': 4, 'name': '...",video-intelligence//m/016tb7
3,0.79269069,10,Courteney Cox,4.004004,3.378378,google,"[{'confidence': 0.79269069, 'id': 10, 'name': ...",video-intelligence//m/01rr9f
4,0.83260906,11,Lisa Kudrow,5.38038,3.378378,google,"[{'confidence': 0.83260906, 'id': 11, 'name': ...",video-intelligence//m/016tb7
5,0.81472307,26,Matt LeBlanc,11.511511,10.26026,google,"[{'confidence': 0.81472307, 'id': 26, 'name': ...",video-intelligence//m/01rrd4
6,0.76948714,28,Matt LeBlanc,12.387387,11.636636,google,"[{'confidence': 0.76948714, 'id': 28, 'name': ...",video-intelligence//m/01rrd4
7,0.82898712,30,Matt LeBlanc,14.764764,13.763763,google,"[{'confidence': 0.82898712, 'id': 30, 'name': ...",video-intelligence//m/01rrd4
8,0.80780369,36,Courteney Cox,20.77077,18.018018,google,"[{'confidence': 0.80780369, 'id': 36, 'name': ...",video-intelligence//m/01rr9f
9,0.76334,43,Courteney Cox,26.026026,25.15015,google,"[{'confidence': 0.76334, 'id': 43, 'name': 'Co...",video-intelligence//m/01rr9f


# Creating merged json:

In [13]:
merged_celebrity_json = {'celebrities': json.loads(merged_segments.to_json(orient='records'))}
with open('out.json', "w") as write_file:
    json.dump(merged_celebrity_json, write_file, indent=4, sort_keys=True)

In [14]:
print(merged_celebrity_json)

{'celebrities': [{'confidence': 0.82868701, 'id': 0, 'name': 'Courteney Cox', 'segment.end_time': 1.126126, 'segment.start_time': 0.0, 'source': 'google', 'timestamped_instances': [{'confidence': 0.82868701, 'id': 0, 'name': 'Courteney Cox', 'normalized_bounding_box.bottom': 0.6259259, 'normalized_bounding_box.left': 0.62083334, 'normalized_bounding_box.right': 0.72500002, 'normalized_bounding_box.top': 0.40925926, 'source': 'google', 'timestamp': 0.0, 'url': 'video-intelligence//m/01rr9f', 'added_to_segment': True}, {'confidence': 0.82868701, 'id': 0, 'name': 'Courteney Cox', 'normalized_bounding_box.bottom': 0.62777776, 'normalized_bounding_box.left': 0.61979169, 'normalized_bounding_box.right': 0.72604167, 'normalized_bounding_box.top': 0.40925926, 'source': 'google', 'timestamp': 0.125125, 'url': 'video-intelligence//m/01rr9f', 'added_to_segment': True}, {'confidence': 0.82868701, 'id': 0, 'name': 'Courteney Cox', 'normalized_bounding_box.bottom': 0.63148147, 'normalized_bounding_b

In [17]:
temp = merged_celebrity_json['celebrities'][0]
temp

{'confidence': 0.82868701,
 'id': 0,
 'name': 'Courteney Cox',
 'segment.end_time': 1.126126,
 'segment.start_time': 0.0,
 'source': 'google',
 'timestamped_instances': [{'confidence': 0.82868701,
   'id': 0,
   'name': 'Courteney Cox',
   'normalized_bounding_box.bottom': 0.6259259,
   'normalized_bounding_box.left': 0.62083334,
   'normalized_bounding_box.right': 0.72500002,
   'normalized_bounding_box.top': 0.40925926,
   'source': 'google',
   'timestamp': 0.0,
   'url': 'video-intelligence//m/01rr9f',
   'added_to_segment': True},
  {'confidence': 0.82868701,
   'id': 0,
   'name': 'Courteney Cox',
   'normalized_bounding_box.bottom': 0.62777776,
   'normalized_bounding_box.left': 0.61979169,
   'normalized_bounding_box.right': 0.72604167,
   'normalized_bounding_box.top': 0.40925926,
   'source': 'google',
   'timestamp': 0.125125,
   'url': 'video-intelligence//m/01rr9f',
   'added_to_segment': True},
  {'confidence': 0.82868701,
   'id': 0,
   'name': 'Courteney Cox',
   'norma

In [20]:
split_dict = [[a.split('.'), b] for a, b in temp.items()] 

split_dict

[[['confidence'], 0.82868701],
 [['id'], 0],
 [['name'], 'Courteney Cox'],
 [['segment', 'end_time'], 1.126126],
 [['segment', 'start_time'], 0.0],
 [['source'], 'google'],
 [['timestamped_instances'],
  [{'confidence': 0.82868701,
    'id': 0,
    'name': 'Courteney Cox',
    'normalized_bounding_box.bottom': 0.6259259,
    'normalized_bounding_box.left': 0.62083334,
    'normalized_bounding_box.right': 0.72500002,
    'normalized_bounding_box.top': 0.40925926,
    'source': 'google',
    'timestamp': 0.0,
    'url': 'video-intelligence//m/01rr9f',
    'added_to_segment': True},
   {'confidence': 0.82868701,
    'id': 0,
    'name': 'Courteney Cox',
    'normalized_bounding_box.bottom': 0.62777776,
    'normalized_bounding_box.left': 0.61979169,
    'normalized_bounding_box.right': 0.72604167,
    'normalized_bounding_box.top': 0.40925926,
    'source': 'google',
    'timestamp': 0.125125,
    'url': 'video-intelligence//m/01rr9f',
    'added_to_segment': True},
   {'confidence': 0.82

In [23]:
from collections import defaultdict 
from functools import reduce
from operator import getitem 
  
  
def getFromDict(dataDict, mapList): 
    return reduce(getitem, mapList, dataDict) 

tree = lambda: defaultdict(tree) 
d = tree() 
  
def default_to_regular(d): 
    if isinstance(d, defaultdict): 
        d = {k: default_to_regular(v) for k, v in d.items()} 
    return d  

for k, v in temp.items(): 
    * keys, final_key = k.split('.') 
    getFromDict(d, keys)[final_key] = v 

print (str(default_to_regular(d))) 

{'confidence': 0.82868701, 'id': 0, 'name': 'Courteney Cox', 'segment': {'end_time': 1.126126, 'start_time': 0.0}, 'source': 'google', 'timestamped_instances': [{'confidence': 0.82868701, 'id': 0, 'name': 'Courteney Cox', 'normalized_bounding_box.bottom': 0.6259259, 'normalized_bounding_box.left': 0.62083334, 'normalized_bounding_box.right': 0.72500002, 'normalized_bounding_box.top': 0.40925926, 'source': 'google', 'timestamp': 0.0, 'url': 'video-intelligence//m/01rr9f', 'added_to_segment': True}, {'confidence': 0.82868701, 'id': 0, 'name': 'Courteney Cox', 'normalized_bounding_box.bottom': 0.62777776, 'normalized_bounding_box.left': 0.61979169, 'normalized_bounding_box.right': 0.72604167, 'normalized_bounding_box.top': 0.40925926, 'source': 'google', 'timestamp': 0.125125, 'url': 'video-intelligence//m/01rr9f', 'added_to_segment': True}, {'confidence': 0.82868701, 'id': 0, 'name': 'Courteney Cox', 'normalized_bounding_box.bottom': 0.63148147, 'normalized_bounding_box.left': 0.61874998