In [4]:
import os
import sys
sys.path.insert(0, os.path.abspath('../src'))

import copy
import re
from tqdm.notebook import tqdm

import pandas as pd
from SPARQLWrapper import SPARQLWrapper, JSON
from frame_collector import FrameCollector
from src.connectors import antract_connector as antract
from src.utils.media_fragment import convert_to_seconds_npt

W0429 13:58:30.799485 4543577536 deprecation.py:506] From /usr/local/lib/python3.7/site-packages/tensorflow_core/python/ops/resource_variable_ops.py:1630: calling BaseResourceVariable.__init__ (from tensorflow.python.ops.resource_variable_ops) with constraint is deprecated and will be removed in a future version.
Instructions for updating:
If using Keras pass *_constraint arguments to layers.


In [5]:
ENDPOINT = "https://okapi.ina.fr/antract/api/saphir/sparql_search"
sparql = SPARQLWrapper(ENDPOINT)
sparql.setReturnFormat(JSON)

In [6]:
with open('celebrities.txt', encoding='utf-8') as file:
    celebrities_names = [l.strip() for l in file.readlines()]
celebrities_names

['Ben Bella, Ahmed',
 'Gaulle, Charles de',
 'Eisenhower, Dwight David',
 "Elizabeth d'Angleterre",
 'Mitterrand, François',
 'Bidault, Georges',
 'Pompidou, Georges',
 'Mollet, Guy',
 'Adenauer, Konrad',
 'Khrouchtchev, Nikita',
 'Mendès France, Pierre',
 'Molotov, Viatcheslav',
 'Auriol, Vincent']

In [7]:
with open('person_by_name.rq') as file:
    query = file.read()

In [8]:
def get_celebrity_uri(name):
    q = query.replace('?label', '"%s"' % name)
    sparql.setQuery(q)
    results = sparql.query().convert()["results"]["bindings"]
    return results[0]['person']['value']

celebrities_uri = [get_celebrity_uri(c) for c in celebrities_names]

for uri, name in zip(celebrities_uri, celebrities_names):
    print('%s\t%s'%(uri,name))

http://www.ina.fr/thesaurus/pp/concept_10202345	Ben Bella, Ahmed
http://www.ina.fr/thesaurus/pp/concept_10075863	Gaulle, Charles de
http://www.ina.fr/thesaurus/pp/concept_10128605	Eisenhower, Dwight David
http://www.ina.fr/thesaurus/pp/concept_330024	Elizabeth d'Angleterre
http://www.ina.fr/thesaurus/pp/concept_10080266	Mitterrand, François
http://www.ina.fr/thesaurus/pp/concept_10217774	Bidault, Georges
http://www.ina.fr/thesaurus/pp/concept_10074163	Pompidou, Georges
http://www.ina.fr/thesaurus/pp/concept_10176011	Mollet, Guy
http://www.ina.fr/thesaurus/pp/concept_10147429	Adenauer, Konrad
http://www.ina.fr/thesaurus/pp/concept_10130157	Khrouchtchev, Nikita
http://www.ina.fr/thesaurus/pp/concept_10200837	Mendès France, Pierre
http://www.ina.fr/thesaurus/pp/concept_10160074	Molotov, Viatcheslav
http://www.ina.fr/thesaurus/pp/concept_10172212	Auriol, Vincent


In [9]:
with open('segment_by_person.rq') as file:
    query = file.read()

In [10]:
def pad_spaces(text):
    pad = 25 - len(text)
    return text + ''.join([' ' for _ in range(0,pad)])
    
def get_segments(person, name):
    q = query.replace('?person', '<%s>' % person)
    sparql.setQuery(q)
    results = sparql.query().convert()["results"]["bindings"]
    for r in results:
        for p in r:
            value = r[p]['value']
            if p in ['start', 'end']:
                value = convert_to_seconds_npt(re.sub(r"T(\d{2}:\d{2}:\d{2}).+", "\g<1>", value))
            if p == 'prop':
                value = re.split(r"[/#]", value)[-1]
            r[p] = value
        r['person'] = name
        r['person_uri'] = person
    print('- %s\t%s'%(pad_spaces(name), len(results)))
    
    
    return pd.DataFrame.from_dict([r for r in results if r['start'] < r['end']])

print('Num. results per person:')
df = pd.concat([get_segments(uri, name) 
                     for uri, name in tqdm(zip(celebrities_uri, celebrities_names), total=len(celebrities_names))],
               ignore_index=True)
df.head()

Num. results per person:


HBox(children=(FloatProgress(value=0.0, max=13.0), HTML(value='')))

- Ben Bella, Ahmed         	17
- Gaulle, Charles de       	406
- Eisenhower, Dwight David 	88
- Elizabeth d'Angleterre   	42
- Mitterrand, François     	24
- Bidault, Georges         	110
- Pompidou, Georges        	113
- Mollet, Guy              	65
- Adenauer, Konrad         	65
- Khrouchtchev, Nikita     	65
- Mendès France, Pierre    	42
- Molotov, Viatcheslav     	38
- Auriol, Vincent          	200



Unnamed: 0,media,title,start,end,url,person,person_uri
0,http://www.ina.fr/media/AFE86003838,"A Tlemcen, Ferhat Abbas chez Ben Bella",232.0,288.0,/Media/AF/AFE86003838.mp4,"Ben Bella, Ahmed",http://www.ina.fr/thesaurus/pp/concept_10202345
1,http://www.ina.fr/media/AFE86003847,Les élections en Algérie,138.0,212.0,/Media/AF/AFE86003847.mp4,"Ben Bella, Ahmed",http://www.ina.fr/thesaurus/pp/concept_10202345
2,http://www.ina.fr/media/AFE86003893,La conférence de l' Organisation de l'unité af...,14.0,43.0,/Media/AF/AFE86003893.mp4,"Ben Bella, Ahmed",http://www.ina.fr/thesaurus/pp/concept_10202345
3,http://www.ina.fr/media/AFE86004111,"L'ami de Fidel Castro, un homme dans la révolu...",239.0,330.0,/Media/AF/AFE86004111.mp4,"Ben Bella, Ahmed",http://www.ina.fr/thesaurus/pp/concept_10202345
4,http://www.ina.fr/media/AFE86003905,L'accord Algero - Marocain... mais les combats...,231.0,311.0,/Media/AF/AFE86003905.mp4,"Ben Bella, Ahmed",http://www.ina.fr/thesaurus/pp/concept_10202345


In [11]:
len(df)

1224

In [12]:
with open('shots_by_segment.rq') as file:
    query = file.read()
    
def get_shots(row):
    q = query.replace('?media', '<%s>' % row['media'])
    sparql.setQuery(q)
    results = sparql.query().convert()["results"]["bindings"]
    for r in results:
        for p in r:
            value = r[p]['value']
            if p in ['start', 'end']:
                value = convert_to_seconds_npt(re.sub(r"T(\d{2}:\d{2}:\d{2}).+", "\g<1>", value))
            if p == 'prop':
                value = re.split(r"[/#]", value)[-1]
            r[p] = value
        r['media'] = row['media']
        r['person'] = row['person']
        r['url'] = row['url']
        start = row['start']
        end = row['end']
    results = [r for r in results 
               if(r['start'] >= start and r['end'] <= end and r['start'] != r['end'])]
    if len(results) == 0:
        print('Empty results for %s (%s - %s, %s)'%( row['media'], row['person'], start, end))
    return pd.DataFrame.from_dict(results)

In [15]:
shots = pd.concat([get_shots(row)
                   for idx, row in tqdm(df.iterrows(),total=len(df))], ignore_index=True)
shots.head()

HBox(children=(FloatProgress(value=0.0, max=1224.0), HTML(value='')))




Unnamed: 0,layer,segment,start,end,media,person,url
0,http://www.campus-AAR.fr/sceneLayer_AFE86003838,http://www.campus-AAR.fr/sceneSegment_AFE86003...,232.0,235.0,http://www.ina.fr/media/AFE86003838,"Ben Bella, Ahmed",/Media/AF/AFE86003838.mp4
1,http://www.campus-AAR.fr/sceneLayer_AFE86003838,http://www.campus-AAR.fr/sceneSegment_AFE86003...,235.0,239.0,http://www.ina.fr/media/AFE86003838,"Ben Bella, Ahmed",/Media/AF/AFE86003838.mp4
2,http://www.campus-AAR.fr/sceneLayer_AFE86003838,http://www.campus-AAR.fr/sceneSegment_AFE86003...,239.0,244.0,http://www.ina.fr/media/AFE86003838,"Ben Bella, Ahmed",/Media/AF/AFE86003838.mp4
3,http://www.campus-AAR.fr/sceneLayer_AFE86003838,http://www.campus-AAR.fr/sceneSegment_AFE86003...,244.0,246.0,http://www.ina.fr/media/AFE86003838,"Ben Bella, Ahmed",/Media/AF/AFE86003838.mp4
4,http://www.campus-AAR.fr/sceneLayer_AFE86003838,http://www.campus-AAR.fr/sceneSegment_AFE86003...,246.0,250.0,http://www.ina.fr/media/AFE86003838,"Ben Bella, Ahmed",/Media/AF/AFE86003838.mp4


In [16]:
len(shots)

48

In [None]:
# from importlib import reload
# reload(frame_collector)
# FrameCollector = frame_collector.FrameCollector

In [17]:
old_loc = ''
fc = None

shots.sort_values(by='media', ascending=False)

table = shots

def ex_frame(row):
    loc = antract.apply_auth('https://okapi.ina.fr/antract' + row['url'])
    if loc != old_loc:
        fc = FrameCollector(loc,'antract')
    
    start = row['start']
    end = row['end']
    center = int((start + end) / 2)
    
    return fc.run(fragment=[center])

filenames = [ex_frame(row) for idx, row in tqdm(table.iterrows(), total=len(table))]

HBox(children=(FloatProgress(value=0.0, max=48.0), HTML(value='')))




In [19]:
shots['frame_file'] = filenames
shots.to_csv(r'shots.csv', index = False)

Unnamed: 0,layer,segment,start,end,media,person,url,frame_file
0,http://www.campus-AAR.fr/sceneLayer_AFE86003838,http://www.campus-AAR.fr/sceneSegment_AFE86003...,232.0,235.0,http://www.ina.fr/media/AFE86003838,"Ben Bella, Ahmed",/Media/AF/AFE86003838.mp4,0
1,http://www.campus-AAR.fr/sceneLayer_AFE86003838,http://www.campus-AAR.fr/sceneSegment_AFE86003...,235.0,239.0,http://www.ina.fr/media/AFE86003838,"Ben Bella, Ahmed",/Media/AF/AFE86003838.mp4,./frames/antract/AFE86003838.mp4_5925.jpg
2,http://www.campus-AAR.fr/sceneLayer_AFE86003838,http://www.campus-AAR.fr/sceneSegment_AFE86003...,239.0,244.0,http://www.ina.fr/media/AFE86003838,"Ben Bella, Ahmed",/Media/AF/AFE86003838.mp4,./frames/antract/AFE86003838.mp4_6025.jpg
3,http://www.campus-AAR.fr/sceneLayer_AFE86003838,http://www.campus-AAR.fr/sceneSegment_AFE86003...,244.0,246.0,http://www.ina.fr/media/AFE86003838,"Ben Bella, Ahmed",/Media/AF/AFE86003838.mp4,./frames/antract/AFE86003838.mp4_6125.jpg
4,http://www.campus-AAR.fr/sceneLayer_AFE86003838,http://www.campus-AAR.fr/sceneSegment_AFE86003...,246.0,250.0,http://www.ina.fr/media/AFE86003838,"Ben Bella, Ahmed",/Media/AF/AFE86003838.mp4,./frames/antract/AFE86003838.mp4_6200.jpg
5,http://www.campus-AAR.fr/sceneLayer_AFE86003838,http://www.campus-AAR.fr/sceneSegment_AFE86003...,250.0,254.0,http://www.ina.fr/media/AFE86003838,"Ben Bella, Ahmed",/Media/AF/AFE86003838.mp4,./frames/antract/AFE86003838.mp4_6300.jpg
6,http://www.campus-AAR.fr/sceneLayer_AFE86003838,http://www.campus-AAR.fr/sceneSegment_AFE86003...,254.0,256.0,http://www.ina.fr/media/AFE86003838,"Ben Bella, Ahmed",/Media/AF/AFE86003838.mp4,0
7,http://www.campus-AAR.fr/sceneLayer_AFE86003838,http://www.campus-AAR.fr/sceneSegment_AFE86003...,256.0,259.0,http://www.ina.fr/media/AFE86003838,"Ben Bella, Ahmed",/Media/AF/AFE86003838.mp4,0
8,http://www.campus-AAR.fr/sceneLayer_AFE86003838,http://www.campus-AAR.fr/sceneSegment_AFE86003...,259.0,261.0,http://www.ina.fr/media/AFE86003838,"Ben Bella, Ahmed",/Media/AF/AFE86003838.mp4,./frames/antract/AFE86003838.mp4_6500.jpg
9,http://www.campus-AAR.fr/sceneLayer_AFE86003838,http://www.campus-AAR.fr/sceneSegment_AFE86003...,261.0,263.0,http://www.ina.fr/media/AFE86003838,"Ben Bella, Ahmed",/Media/AF/AFE86003838.mp4,0
