In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import re
from bs4 import BeautifulSoup
import requests as req
import json
# from tqdm import tqdm
from tqdm.notebook  import tqdm
import itertools
from unidecode import unidecode
import time 
import pyodbc 
tqdm.pandas()

In [1]:
import lumieres_api as lum
import utils
import coeurimages_preprocessing as cpro

# LUMIERES API

In [2]:
token=lum.get_token()
movie={
    "title": "Microcosmos",
    "director": "Claude	Nuridsany",
    "production_country": 'FR' ,
    "include_minority_coproducing_country": True,
    "prod_start_year": 1995,
}
print((lum.movie_request(movie,token)))
lum.logout(token)

[{'id': 8813, 'imdb_id': 'tt0117040', 'original_title': "Microcosmos : le peuple de l'herbe", 'prod_year': 1996, 'production_countries': 'FR, IT, CH', 'relevance': 0.95, 'matching_title': "Microcosmos : le peuple de l'herbe", 'directors': 'Claude Nuridsany, Marie Pérennou', 'total_admissions_obs': 5876617}]


'{"status":"success"}'

In [3]:
token=lum.get_token()
# giving all the arguments
lum.match_lumieres_id(token,"Microcosmos","Claude	Nuridsany",
                      production_country='FR',
                      prod_start_year=1995,
                      prod_end_year=False,
                      exp_start_year=False,
                      exp_end_year=False)

[{'id': 8813,
  'imdb_id': 'tt0117040',
  'original_title': "Microcosmos : le peuple de l'herbe",
  'prod_year': 1996,
  'production_countries': 'FR, IT, CH',
  'relevance': 0.95,
  'matching_title': "Microcosmos : le peuple de l'herbe",
  'directors': 'Claude Nuridsany, Marie Pérennou',
  'total_admissions_obs': 5876617}]

In [4]:
# only providing a dict
lum.match_lumieres_id(token,research_params=movie)

[{'id': 8813,
  'imdb_id': 'tt0117040',
  'original_title': "Microcosmos : le peuple de l'herbe",
  'prod_year': 1996,
  'production_countries': 'FR, IT, CH',
  'relevance': 0.95,
  'matching_title': "Microcosmos : le peuple de l'herbe",
  'directors': 'Claude Nuridsany, Marie Pérennou',
  'total_admissions_obs': 5876617}]

In [5]:
# getting the microcosmos admissions
lum.get_admissions(8813)

[{'admissions': 22509,
  'year': 1996,
  'market': 'ES',
  'national': False,
  'region': 'Europe (CoE)'},
 {'admissions': 2001064,
  'year': 1996,
  'market': 'FR',
  'national': True,
  'region': 'Europe (CoE)'},
 {'admissions': 23554,
  'year': 1996,
  'market': 'AT',
  'national': False,
  'region': 'Europe (CoE)'},
 {'admissions': 133004,
  'year': 1996,
  'market': 'BE',
  'national': False,
  'region': 'Europe (CoE)'},
 {'admissions': 351883,
  'year': 1996,
  'market': 'CH',
  'national': False,
  'region': 'Europe (CoE)'},
 {'admissions': 419203,
  'year': 1996,
  'market': 'DE',
  'national': False,
  'region': 'Europe (CoE)'},
 {'admissions': 6500,
  'year': 1996,
  'market': 'LU',
  'national': False,
  'region': 'Europe (CoE)'},
 {'admissions': 278517,
  'year': 1996,
  'market': 'US_CA',
  'national': False,
  'region': 'North America'},
 {'admissions': 864,
  'year': 1996,
  'market': 'RO',
  'national': False,
  'region': 'Europe (CoE)'},
 {'admissions': 955,
  'year': 

In [6]:
# getting microcosmos external ids
lum.get_external_ids(8813)

[{'url': 'https://www.imdb.com/title/tt0117040/',
  'name': 'IMDb ID',
  'id': 'tt0117040'},
 {'url': 'https://web.isan.org/public/en/isan/0000-0001-0A9D-0000-5-0000-0000-M',
  'name': 'ISAN',
  'id': '0000-0001-0A9D-0000-5-0000-0000-M'},
 {'url': 'https://ui.eidr.org/view/content?id=10.5240/3AE3-814F-9A8D-BEA9-5965-A',
  'name': 'EIDR',
  'id': '10.5240/3AE3-814F-9A8D-BEA9-5965-A'},
 {'url': None, 'name': 'Justwatch Id', 'id': 'tm107773'},
 {'url': 'https://www.justwatch.com/it/film/microcosmos-il-popolo-dellerba',
  'name': 'JustWatch title url',
  'id': 'it/film/microcosmos-il-popolo-dellerba'},
 {'url': 'https://www.wikidata.org/wiki/Q675044',
  'name': 'Wikidata',
  'id': 'Q675044'},
 {'url': 'https://www.cbo-boxoffice.com/v4/page000.php3?Xnumitem=110&inc=fichemov.php3&fid=3327',
  'name': 'cbo-boxoffice.com',
  'id': '3327'},
 {'url': 'https://www.cnc.fr/professionnels/visas-et-classification/79747',
  'name': 'CNC (FR)',
  'id': '79747'},
 {'url': None, 'name': 'FFA (DE)', 'id':

# UTILS

In [7]:
matching_result_exemple=[{'recherche': {'title': 'The Hamlet Syndrome (Syndrom Hamleta)',
   'director': 'Elwira Niewiera',
   'production_country': 'PL',
   'include_minority_coproducing_country': True,
   'prod_start_year': '2020'},
  'resultat': [{'id': 336349,
    'imdb_id': 'tt14618312',
    'original_title': 'The Hamlet Syndrome',
    'prod_year': 2022,
    'production_countries': 'PL, DE',
    'relevance': 0.95,
    'matching_title': 'The Hamlet Syndrome',
    'directors': 'Elwira Niewiera, Piotr Rosolowski',
    'total_admissions_obs': 2387}]},
 {'recherche': {'title': 'The Hamlet Syndrome (Syndrom Hamleta)',
   'director': 'Elwira Niewiera',
   'production_country': 'DE',
   'include_minority_coproducing_country': True,
   'prod_start_year': '2020'},
  'resultat': [{'id': 336349,
    'imdb_id': 'tt14618312',
    'original_title': 'The Hamlet Syndrome',
    'prod_year': 2022,
    'production_countries': 'PL, DE',
    'relevance': 0.95,
    'matching_title': 'The Hamlet Syndrome',
    'directors': 'Elwira Niewiera, Piotr Rosolowski',
    'total_admissions_obs': 2387}]},
 {'recherche': {'title': 'The Hamlet Syndrome (Syndrom Hamleta)',
   'director': 'Piotr Rosolowski',
   'production_country': 'PL',
   'include_minority_coproducing_country': True,
   'prod_start_year': '2020'},
  'resultat': [{'id': 336349,
    'imdb_id': 'tt14618312',
    'original_title': 'The Hamlet Syndrome',
    'prod_year': 2022,
    'production_countries': 'PL, DE',
    'relevance': 0.95,
    'matching_title': 'The Hamlet Syndrome',
    'directors': 'Elwira Niewiera, Piotr Rosolowski',
    'total_admissions_obs': 2387}]},
 {'recherche': {'title': 'The Hamlet Syndrome (Syndrom Hamleta)',
   'director': 'Piotr Rosolowski',
   'production_country': 'DE',
   'include_minority_coproducing_country': True,
   'prod_start_year': '2020'},
  'resultat': [{'id': 336349,
    'imdb_id': 'tt14618312',
    'original_title': 'The Hamlet Syndrome',
    'prod_year': 2022,
    'production_countries': 'PL, DE',
    'relevance': 0.95,
    'matching_title': 'The Hamlet Syndrome',
    'directors': 'Elwira Niewiera, Piotr Rosolowski',
    'total_admissions_obs': 2387}]},
 {'recherche': {'title': 'Le Syndrome de Hamlet',
   'director': 'Elwira Niewiera',
   'production_country': 'PL',
   'include_minority_coproducing_country': True,
   'prod_start_year': '2020'},
  'resultat': [{'id': 336349,
    'imdb_id': 'tt14618312',
    'original_title': 'The Hamlet Syndrome',
    'prod_year': 2022,
    'production_countries': 'PL, DE',
    'relevance': 0.95,
    'matching_title': 'The Hamlet Syndrome',
    'directors': 'Elwira Niewiera, Piotr Rosolowski',
    'total_admissions_obs': 2387}]},
 {'recherche': {'title': 'Le Syndrome de Hamlet',
   'director': 'Elwira Niewiera',
   'production_country': 'DE',
   'include_minority_coproducing_country': True,
   'prod_start_year': '2020'},
  'resultat': [{'id': 336349,
    'imdb_id': 'tt14618312',
    'original_title': 'The Hamlet Syndrome',
    'prod_year': 2022,
    'production_countries': 'PL, DE',
    'relevance': 0.95,
    'matching_title': 'The Hamlet Syndrome',
    'directors': 'Elwira Niewiera, Piotr Rosolowski',
    'total_admissions_obs': 2387}]},
 {'recherche': {'title': 'Le Syndrome de Hamlet',
   'director': 'Piotr Rosolowski',
   'production_country': 'PL',
   'include_minority_coproducing_country': True,
   'prod_start_year': '2020'},
  'resultat': [{'id': 336349,
    'imdb_id': 'tt14618312',
    'original_title': 'The Hamlet Syndrome',
    'prod_year': 2022,
    'production_countries': 'PL, DE',
    'relevance': 0.95,
    'matching_title': 'The Hamlet Syndrome',
    'directors': 'Elwira Niewiera, Piotr Rosolowski',
    'total_admissions_obs': 2387}]},
 {'recherche': {'title': 'Le Syndrome de Hamlet',
   'director': 'Piotr Rosolowski',
   'production_country': 'DE',
   'include_minority_coproducing_country': True,
   'prod_start_year': '2020'},
  'resultat': [{'id': 336349,
    'imdb_id': 'tt14618312',
    'original_title': 'The Hamlet Syndrome',
    'prod_year': 2022,
    'production_countries': 'PL, DE',
    'relevance': 0.95,
    'matching_title': 'The Hamlet Syndrome',
    'directors': 'Elwira Niewiera, Piotr Rosolowski',
    'total_admissions_obs': 2387}]},
 {'recherche': {'title': 'The Hamlet Syndrome',
   'director': 'Elwira Niewiera',
   'production_country': 'PL',
   'include_minority_coproducing_country': True,
   'prod_start_year': '2020'},
  'resultat': [{'id': 336349,
    'imdb_id': 'tt14618312',
    'original_title': 'The Hamlet Syndrome',
    'prod_year': 2022,
    'production_countries': 'PL, DE',
    'relevance': 1,
    'matching_title': 'The Hamlet Syndrome',
    'directors': 'Elwira Niewiera, Piotr Rosolowski',
    'total_admissions_obs': 2387}]},
 {'recherche': {'title': 'The Hamlet Syndrome',
   'director': 'Elwira Niewiera',
   'production_country': 'DE',
   'include_minority_coproducing_country': True,
   'prod_start_year': '2020'},
  'resultat': [{'id': 336349,
    'imdb_id': 'tt14618312',
    'original_title': 'The Hamlet Syndrome',
    'prod_year': 2022,
    'production_countries': 'PL, DE',
    'relevance': 1,
    'matching_title': 'The Hamlet Syndrome',
    'directors': 'Elwira Niewiera, Piotr Rosolowski',
    'total_admissions_obs': 2387}]},
 {'recherche': {'title': 'The Hamlet Syndrome',
   'director': 'Piotr Rosolowski',
   'production_country': 'PL',
   'include_minority_coproducing_country': True,
   'prod_start_year': '2020'},
  'resultat': [{'id': 336349,
    'imdb_id': 'tt14618312',
    'original_title': 'The Hamlet Syndrome',
    'prod_year': 2022,
    'production_countries': 'PL, DE',
    'relevance': 1,
    'matching_title': 'The Hamlet Syndrome',
    'directors': 'Elwira Niewiera, Piotr Rosolowski',
    'total_admissions_obs': 2387}]},
 {'recherche': {'title': 'The Hamlet Syndrome',
   'director': 'Piotr Rosolowski',
   'production_country': 'DE',
   'include_minority_coproducing_country': True,
   'prod_start_year': '2020'},
  'resultat': [{'id': 336349,
    'imdb_id': 'tt14618312',
    'original_title': 'The Hamlet Syndrome',
    'prod_year': 2022,
    'production_countries': 'PL, DE',
    'relevance': 1,
    'matching_title': 'The Hamlet Syndrome',
    'directors': 'Elwira Niewiera, Piotr Rosolowski',
    'total_admissions_obs': 2387}]}]

In [8]:
utils.best_id(matching_result_exemple)

{'id': 336349,
 'imdb_id': 'tt14618312',
 'original_title': 'The Hamlet Syndrome',
 'prod_year': 2022,
 'production_countries': 'PL, DE',
 'relevance': 0.95,
 'matching_title': 'The Hamlet Syndrome',
 'directors': 'Elwira Niewiera, Piotr Rosolowski',
 'total_admissions_obs': 2387}

In [9]:
utils.search_params(title=['Neco Jaco Stesti', 'Something Like Happiness', 'Something Like Happiness'],director=['Bohdan Slama'],prod_country=['CZ ', 'DE '],prod_year=2003)

{'title+director+country+year': [{'title': 'Something Like Happiness',
   'director': 'Bohdan Slama',
   'production_country': 'CZ',
   'include_minority_coproducing_country': True,
   'prod_start_year': '2003'},
  {'title': 'Something Like Happiness',
   'director': 'Bohdan Slama',
   'production_country': 'DE',
   'include_minority_coproducing_country': True,
   'prod_start_year': '2003'},
  {'title': 'Neco Jaco Stesti',
   'director': 'Bohdan Slama',
   'production_country': 'CZ',
   'include_minority_coproducing_country': True,
   'prod_start_year': '2003'},
  {'title': 'Neco Jaco Stesti',
   'director': 'Bohdan Slama',
   'production_country': 'DE',
   'include_minority_coproducing_country': True,
   'prod_start_year': '2003'}],
 'title+director+year': [{'title': 'Something Like Happiness',
   'director': 'Bohdan Slama',
   'prod_start_year': '2003'},
  {'title': 'Neco Jaco Stesti',
   'director': 'Bohdan Slama',
   'prod_start_year': '2003'}],
 'director+country+year': [{'directo

# PREPROCESSING

In [5]:
files_data="data/extract_scraping.xlsx"
coproducers_data='data/extract_scraping_coprod.xlsx'
out_file="data/projects_to_be_matched.xlsx"

cpro.preprocessing(files_data,coproducers_data,out_file)

Unnamed: 0,ID,Reference,refyear,OriginalTitle,FrenchTitle,EnglishTitle,CommitteDecisionDate,kind,Genre,support,...,reason,Director,key,AnnouncedAmount,CoproducerId,country1,countryname1_english,percentage_participation,contributor_rank,majmin
0,32,02-050-F,2002,Cert Vi Proc,,The Devil Knows Why,2002-04-24 00:00:00.000,Fiction,,Supported,...,,[Roman Vavra],32,,,,,,,
1,41,02-092-F,2002,True Blue (The Blue Dress),,True Blue,2002-10-02 00:00:00.000,Fiction,,Supported,...,,[Yiannis Diamantopoulos],41,"[196949.0, 787798.0]","[45, 10548]","[BG , GR ]","[Bulgaria, Greece]","[0.2, 0.8]","[2, 1]","[Minority, Majority]"
2,43,02-107-F1,2002,Mathilde,,Mathilde,2002-10-02 00:00:00.000,Fiction,,Supported,...,,[Nina Mimica],43,"[680000.0, 4080000.0, 680000.0, 1360000.0]","[81, 94, 151, 373]","[DE , IT , ES , UK ]","[Germany, Italy, Spain, United Kingdom]","[0.1, 0.6, 0.1, 0.2]","[3, 1, 3, 2]","[Minority, Majority, Minority, Minority]"
3,45,02-118-F,2002,Waiting for the Clouds,En attendant les nuages,Waiting for the Clouds,2002-11-20 00:00:00.000,Fiction,,Supported,...,,[Yesim Ustaoglu],45,,,,,,,
4,47,02-124-F,2002,Pas sur la bouche,,Not on the Lips,2002-11-20 00:00:00.000,Fiction,,Supported,...,,[Alain Resnais],47,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6066,9721,24-194-C (U),2024,La Ligne Bleue,La Ligne Bleue,The Blue Line,,Documentary,Drama,Inelegible,...,,[Marie Dumora],9721,"[329684.0, 86742.0]","[23973, 61029]","[FR , DE ]","[France, Germany]","[0.7917, 0.2083]","[1, 2]","[Majority, Minority]"
6067,9722,24-195-F,2024,Kika,Kika,Kika,,Fiction,Comedy,Inelegible,...,,[Alexe Poukine],9722,"[1565399.0, 604859.0]","[32176, 45689]","[BE , FR ]","[Belgium, France]","[0.7213, 0.2787]","[1, 2]","[Majority, Minority]"
6068,9723,24-196-F,2024,The Proud Princess (Pyšná princezna),La fière princesse,The Proud Princess,,Animation,Drama,Inelegible,...,,"[David Lisý, Radek Beran]",9723,"[311302.0, 3959178.0, 752519.0]","[29461, 37564, 61520]","[SK , CZ , PL ]","[Slovakia, Czech Republic, Poland]","[0.06198, 0.78821, 0.14981]","[3, 1, 2]","[Minority, Majority, Minority]"
6069,9724,24-197-F (U),2024,Short Summer,Un Bref Été,Short Summer,,Fiction,Drama,Inelegible,...,,[Nastia Korkia],9724,"[1030300.0, 222100.0, 397600.0]","[37969, 46676, 61039]","[DE , RS , FR ]","[Germany, Serbia, France]","[0.62442, 0.13461, 0.24097]","[1, 3, 2]","[Majority, Minority, Minority]"


# next