In [2]:
import requests
import pandas as pd

API_KEY = '0ba62ab60972c5886c75b0229d9a58f0'  # requests.get('localhost:3250/API_KEY')['TheMovieDB']
LANGUAGE = 'fr-FR'

In [3]:
def search_movies(API_KEY:str, query:str, language:str='en-US', page:int=1):
    return requests.get('https://api.themoviedb.org/3/search/movie?api_key=' + API_KEY + '&query=' + query + '&language=' + language + '&page=' + str(page)).json()


def get_movie_details(API_KEY:str, movie_id:int, language:str='en-US'):
    return requests.get('https://api.themoviedb.org/3/movie/' + str(movie_id) + '?api_key=' + API_KEY + '&language=' + language).json()


def get_movie_credits(API_KEY:str, movie_id:int, language:str='en-US'):
    return requests.get('https://api.themoviedb.org/3/movie/' + str(movie_id) + '/credits?api_key=' + API_KEY + '&language=' + language).json()


def get_directors(API_KEY:str, movie_id:int, language:str='en-US') -> list:
    crew = pd.DataFrame(get_movie_credits(API_KEY, movie_id, language)['crew'])
    
    try:
        return crew.loc[crew.department == 'Directing'].name.values.tolist()
    except:
        return []
    
    
def get_casting(API_KEY:str, movie_id:int, language:str='en-US') -> list:
    crew_list = []
    crew = pd.DataFrame(get_movie_credits(API_KEY, movie_id, language)['cast'])

    for idx, row in crew.iterrows():
        if row['name'] and row.character:
            crew_list.append(row['name'] + ' : ' + row.character)
        
    return crew_list
    
    
def get_genres_from_details(details) -> list:
    genres = []
    for detail in details:
        genres.append(detail['name'])
    return genres

In [18]:
def clean_query(query:str) -> str:
    return query.replace('é', 'e').replace('è', 'e').replace('ë', 'e').replace("'", ' ').replace(",", '')

def make_quick_searching(API_KEY:str, query:str, language:str='en-US', page:int=1):
    # Search a movie
    results = search_movies(API_KEY, query, language=language, page=page)
    print('Total pages : ' + str(results['total_pages']) + '\nTotal results : ' + str(results['total_results']) + '\nPrint the page ' + str(page) + ' (only 20 results max will be shown here) :\n\n')
    
    movies = []
    for result in results['results']:
        # Get the first result id
        # result['results'][0]['id']
        mov_id = result['id']

        # Get the detail and credits
        detail = get_movie_details(API_KEY, mov_id, language=language)
        credit = get_movie_credits(API_KEY, mov_id, language=language)
        movies.append({
            'Title (original title)': detail['title'] + ' (' + detail['original_title'] + ')',
            'Directors': ', '.join(get_directors(API_KEY, mov_id)),
            'Date': detail['release_date'],
            'Main genre': ', '.join(get_genres_from_details(detail['genres'])) if len(detail['genres']) else None
        })
        
    return movies
    
pd.DataFrame(make_quick_searching(API_KEY, 'voyage à tokyo', language=LANGUAGE))

Total pages : 1
Total results : 1
Print the page 1 (only 20 results max will be shown here) :




Unnamed: 0,Title (original title),Directors,Date,Main genre
0,Voyage à Tokyo (東京物語),"Shōhei Imamura, Yasujirō Ozu",1953-11-03,Drame


In [5]:
def make_detailed_searching(API_KEY:str, query:str, language:str='en-US', page:int=1, date=None):
    # Search a movie
    results = search_movies(API_KEY, query, language=language, page=page)
    
    if date:
        results_df = pd.DataFrame(results['results'])
        res = pd.DataFrame({})  # Init res
        i = 0  # Init i
        
        while len(res) < 1:
            try:
                for mov_id in results_df['id']:

                    # Get the detail and credits
                    detail = get_movie_details(API_KEY, mov_id, language=language)

                    if (pd.Timestamp(detail['release_date']) > pd.Timestamp(int(date)-i, 1, 1)) and (pd.Timestamp(detail['release_date']) < pd.Timestamp(int(date)+i, 12, 31)):
                        credit = get_movie_credits(API_KEY, mov_id, language=language)
                        return {
                            'Title (original title)': detail['title'] + ' (' + detail['original_title'] + ')',
                            'Directors': ', '.join(get_directors(API_KEY, mov_id)),
                            'Date': detail['release_date'],
                            'Main genre': ', '.join(get_genres_from_details(detail['genres'])) if len(detail['genres']) else None,
                            'Synopsis': detail['overview'],
                            'IMDB average note': detail['vote_average'],
                            'casting': get_casting(API_KEY, mov_id)
                        }
                i += 1
            except:
                res = [None]
            
    for result in results['results']:
        # Get the first result id
        # result['results'][0]['id']
        mov_id = result['id']

        # Get the detail and credits
        detail = get_movie_details(API_KEY, mov_id, language=language)
        credit = get_movie_credits(API_KEY, mov_id, language=language)

        return {
            'Title (original title)': detail['title'] + ' (' + detail['original_title'] + ')',
            'Directors': ', '.join(get_directors(API_KEY, mov_id)),
            'Date': detail['release_date'],
            'Main genre': ', '.join(get_genres_from_details(detail['genres'])) if len(detail['genres']) else None,
            'Synopsis': detail['overview'],
            'IMDB average note': detail['vote_average'],
            'casting': get_casting(API_KEY, mov_id)
        }

make_detailed_searching(API_KEY, 'voyage à tokyo', language=LANGUAGE)

{'Title (original title)': 'Voyage à Tokyo (東京物語)',
 'Directors': 'Shōhei Imamura, Yasujirō Ozu',
 'Date': '1953-11-03',
 'Main genre': 'Drame',
 'Synopsis': "Un couple de personnes âgées rend visite à leurs enfants à Tokyo. D'abord reçus avec les égards qui leur sont dûs, ils deviennent bientôt dérangeants dans leur vie quotidienne.",
 'IMDB average note': 8.3,
 'casting': ['Chishū Ryū : Shukishi Hirayama',
  'Chieko Higashiyama : Tomi Hirayama',
  'Setsuko Hara : Noriko Hirayama',
  'Haruko Sugimura : Shige Kaneko',
  'Sō Yamamura : Koichi Hirayama',
  'Kuniko Miyake : Fumiko Hirayama',
  'Kyōko Kagawa : Kyôko Hirayama',
  'Eijirō Tōno : Sanpei Numata',
  'Nobuo Nakamura : Kurazo Kaneko',
  'Shirō Ōsaka : Keiso Hirayama',
  'Hisao Toake : Osamu Hattori',
  'Teruko Nagaoka : Yone Hattori',
  'Mutsuko Sakura : Oden-ya no onna',
  'Toyo Takahashi : Rinka no saikun',
  'Tōru Abe : Tetsudou-shokuin',
  'Sachiko Mitani : Aparto no onna',
  'Ryōko Mizuki : Biyouin no kyaku',
  'Keijirô Moro

------------

## Tests

In [6]:
search_movies(API_KEY, 'voyags tok');  # Test with bad name

In [7]:
get_movie_details(API_KEY, 18148);  # Test get details

In [8]:
pd.DataFrame(make_quick_searching(API_KEY, 'Star Wars', language=LANGUAGE));  # Test get many movies

Total pages : 8
Total results : 148
Print the page 1 (only 20 results max will be shown here) :




-----

## Concrete tests

In [9]:
import os
import re
import fnmatch
import pandas as pd
import datetime
from ipywidgets import IntProgress
from IPython.display import display

PATH = "F:/Data/Cinémathèque"  #  F:\Data\Cinémathèque    D:/Cinémathèque/
PATTERNS = ["*.mkv", "*.mp4", "*.m4v", "*.avi", "*.ts"]

In [10]:
def list_lstrip(list_to_clean):
    list_cleaned = []
    for string in list_to_clean:
        list_cleaned.append(string.lstrip())
        
    return list_cleaned

In [11]:
def get_mkv_list(path="D:/Cinémathèque/", patterns=["*.mkv"]):
    file_list = []
    for root, dirs, files in os.walk(path):
        if root == path:  # To prevent subfolder analyze
            for file in files:
                for pattern in patterns:
                    if fnmatch.fnmatch(file, pattern):  # to get only with ".mkv" extension
                        file_list.append({'path': path, 'file': file[:-len(pattern)+1], 'extension': pattern[1:]})
    return pd.DataFrame(file_list)

In [12]:
def get_movie_name_and_date(file_name:str) -> str:
    regex = re.findall('(\(\d\d\d\d\))', file_name)  # Find the date
    
    try:
        name, end = file_name.split(regex[0])  # Get name and director
    except Exception as e:
        return '', ''

    return name[:-1], regex[0].replace('(', '').replace(')', '')

In [38]:
def get_movie_list_json(API_KEY:str, path:str, patterns:list, language:str='en-US', log_details=False):
    movies_list = get_mkv_list(path=path, patterns=patterns)
    movies_details = []
    
    for idx, row in movies_list.iterrows():
        movie_name, date = get_movie_name_and_date(row.file)
        
        if movie_name:
            if log_details:
                print(movie_name)
                
            movie_details = make_detailed_searching(API_KEY, movie_name, language=language, date=date)
            
            """
            if movie_details is None:
                print('--- Warning : Name cleaned for : ' + movie_name)
                movie_details = make_detailed_searching(API_KEY, clean_query(movie_name), language=language, date=date)
            """
            
            if movie_details is None:
                print('--- ERROR : No details founds for : ' + movie_name)
                movie_details = {
                    'Title (original title)': movie_name,
                    'Directors': None,
                    'Date': None,
                    'Main genre': None,
                    'Synopsis': None,
                    'IMDB average note': None,
                    'casting': None
                }
                
            movie_details.update({'stockage_path': row.path, 'stockage_name': row.file, 'stockage_extension': row.extension, 'stockage_date': date})
            movies_details.append(movie_details)
        else:
            print('--- ERROR : Movie name extraction fails for : ' + row.file)
        
    print("\n\n\n   -----   Congrat' !   -----")
    return movies_details

movies_details_list = get_movie_list_json(API_KEY, language='fr-FR', path=PATH, patterns=PATTERNS)

--- ERROR : Movie name extraction fails for : American History X Deleted Scenes
--- ERROR : No details founds for : Aruvu Rezuru, Kikaijikake no Yōseitachi
--- ERROR : Movie name extraction fails for : Conte des chrysanthemes tardifs 1939 VOSTF DVDRIP HEVC AZAZE
--- ERROR : No details founds for : L'intendant Sancho
--- ERROR : No details founds for : L'Invasion de Los Angeles
--- ERROR : No details founds for : L'Étrange Noël de mister Jack
--- ERROR : No details founds for : La cinquième victime
--- ERROR : Movie name extraction fails for : La Luna - Pixar Short
--- ERROR : No details founds for : Le Prince des ténèbres
--- ERROR : No details founds for : Le Seigneur des Anneaux I, La Communauté de l'Anneau
--- ERROR : No details founds for : Le Seigneur des Anneaux II, Les Deux Tours
--- ERROR : No details founds for : Le Seigneur des Anneaux III, Le Retour du Roi
--- ERROR : No details founds for : Les Enfants du paradis, 1re partie
--- ERROR : No details founds for : Les Enfants 

In [39]:
movies_details_df = pd.DataFrame(movies_details_list)

#movies_details_list = movies_details_list.replace(r'\\r', ' // ', regex=True)
movies_details_df = movies_details_df.replace(to_replace=[r"\\t|\\n|\\r", "\t|\n|\r"], value=[' // ',' // '], regex=True)

movies_details_df.to_csv(PATH + '/movies_details.csv', header=True, index=False, sep=';')

In [36]:
movies_details_df

Unnamed: 0,Title (original title),Directors,Date,Main genre,Synopsis,IMDB average note,casting,stockage_path,stockage_name,stockage_extension
0,(500) jours ensemble ((500) Days of Summer),"Marc Webb, Renetta G. Amador",2009-07-17,"Comédie, Drame, Romance","Tom croit encore en un amour qui transfigure, ...",7.2,"[Joseph Gordon-Levitt : Tom Hansen, Zooey Desc...",F:/Data/Cinémathèque,(500) Jours Ensemble (2009) Marc Webb,.mkv
1,1001 Pattes (A Bug's Life),"Andrew Stanton, John Lasseter, B.Z. Petroff",1998-11-25,"Aventure, Animation, Comédie, Familial","Tilt, fourmi quelque peu tête en l’air, détrui...",6.9,"[Dave Foley : Flik (voice), Kevin Spacey : Hop...",F:/Data/Cinémathèque,1001 pattes (1998) John Lasseter,.mkv
2,12 Hommes En Colère (12 Angry Men),"Sidney Lumet, Faith Hubley, Don Kranze",1957-04-10,Drame,Un jeune homme d'origine modeste est accusé du...,8.5,"[Martin Balsam : Juror 1, John Fiedler : Juror...",F:/Data/Cinémathèque,12 Hommes en Colere (1957) Sidney Lumet,.mkv
3,12 Years a Slave (12 Years a Slave),"Steve McQueen, Eva Z. Cabrera, Doug Torres",2013-10-18,"Drame, Histoire","Les États-Unis, quelques années avant la guerr...",7.9,"[Chiwetel Ejiofor : Solomon Northup, Michael F...",F:/Data/Cinémathèque,12 Years a Slave (2013) Steve McQueen,.mkv
4,120 battements par minute (120 battements par ...,Robin Campillo,2017-08-23,Drame,Début des années 90. Alors que le sida tue dep...,7.8,"[Nahuel Pérez Biscayart : Sean Dalmazo, Arnaud...",F:/Data/Cinémathèque,120 Battements par Minute (2017) Robin Campillo,.mkv
...,...,...,...,...,...,...,...,...,...,...
1259,Zootopie (Zootopia),"Byron Howard, Rich Moore, Joaquin Baldwin, Tyl...",2016-02-11,"Animation, Aventure, Familial, Comédie",Zootopia est une ville qui ne ressemble à aucu...,7.7,"[Ginnifer Goodwin : Judy Hopps (voice), Jason ...",F:/Data/Cinémathèque,"Zootopia (2016) Rich Moore, Byron Howard, Jare...",.mkv
1260,À bord du Darjeeling Limited (The Darjeeling L...,"Wes Anderson, Roman Coppola, Jennifer Furches,...",2007-09-07,"Aventure, Drame, Comédie",Trois frères qui ne se sont pas parlé depuis l...,7.2,"[Owen Wilson : Francis, Adrien Brody : Peter, ...",F:/Data/Cinémathèque,À Bord du Darjeeling Limited (2007) Wes Anderson,.mkv
1261,À bout de souffle (À bout de souffle),Jean-Luc Godard,1960-03-16,"Drame, Crime","L'itinéraire d'un jeune délinquant qui, après ...",7.7,[Jean-Paul Belmondo : Michel Poiccard / László...,F:/Data/Cinémathèque,À bout de souffle (1960) Jean-Luc Godard,.mkv
1262,À toute épreuve (辣手神探),John Woo,1992-04-16,"Action, Thriller, Crime","En 1997, alors que les Britanniques sont sur l...",7.6,"[Chow Yun-Fat : Insp. 'Tequila' Yuen, Tony Leu...",F:/Data/Cinémathèque,À toute épreuve (1992) John Woo,.mkv


---

Thibault **Santonja**  
2021