# <center> **Requesting an external API**

A partir d'une liste de titres de films nous allons requêter l'API publique [https://www.omdbapi.com](https://www.omdbapi.com)

Nous enregistrerons les données non-structurées (résumé et affiche du film) dans une base NoSQL (MongoDB)

In [66]:
%reset

## **Imports**

In [67]:
import math
import copy
import re
import json
import requests
import numpy as np
import pandas as pd
from tqdm import tqdm
from unidecode import unidecode

# MongoDB / Pymongo
import pymongo
from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi
import pprint

pd.set_option('display.max_rows', 10)
tqdm.pandas()

api_key = "b8dd5759"

## **Reading the data**


In [68]:
# df_movies = pd.read_csv('csv/movies_year_1982.csv', delimiter = ',')
# df_movies = pd.read_csv('csv/movies_decade_80.csv', delimiter = ',')
df_movies = pd.read_csv('csv/movies_year_1960_to_1970.csv', delimiter = ',', usecols=['title', 'original_title', 'summary', 'url_thumbnail'])
print("Nb movies :", df_movies.shape[0])
df_movies

Nb movies : 518


Unnamed: 0,title,original_title,summary,url_thumbnail
0,L'Armée des Ombres,L'Armée des Ombres,"France 1942. Gerbier, ingénieur des Ponts et C...",https://fr.web.img4.acsta.net/c_310_420/img/23...
1,Easy Rider,Easy Rider,Billy et Captain America disposent d'une gross...,https://fr.web.img3.acsta.net/c_310_420/medias...
2,Un homme qui me plaît,Un homme qui me plaît,"Au cours d'un tournage à Los Angeles, une actr...",https://fr.web.img3.acsta.net/c_310_420/pictur...
3,Z,Z,Un député progressiste est assassiné dans un p...,https://fr.web.img6.acsta.net/c_310_420/pictur...
4,Butch Cassidy et le Kid,Butch Cassidy and the Sundance Kid,"Au début du XXe siècle, Butch Cassidy et son a...",https://fr.web.img5.acsta.net/c_310_420/medias...
...,...,...,...,...
513,L'Inconnu de Las Vegas,Ocean's 11,Gros casse à Las Vegas. Onze amis vétérans de ...,https://fr.web.img5.acsta.net/c_310_420/medias...
514,Celui par qui le scandale arrive...,Home from the Hill,Un grand propriétaire du Sud tyrannise sa femm...,https://fr.web.img2.acsta.net/c_310_420/medias...
515,Le Diabolique Docteur Mabuse,Die 1000 Augen des Dr. Mabuse,Un journaliste est tué dans sa voiture sur la ...,https://fr.web.img6.acsta.net/c_310_420/pictur...
516,Les Maîtresses de Dracula,The Brides of Dracula,Marianne a accepté un poste d’institutrice dan...,https://fr.web.img5.acsta.net/c_310_420/pictur...


In [69]:
def format_string(st):
    ''' format string 
        from "title of the movie" 
        to title+of+the+movie

        Arg: st string to be converted.
    '''
    res = ''
    for c in st:
        if c.isdigit() or c.isalpha() or c.isspace():
            res += unidecode(c)
        else:
            res += ' '
    return '+'.join([word for word in res.split() if len(word) > 1])

def request_omdb_from_title(title):
    ''' Request the omdb API
    
        return a json dictionary with the information about the movie.

        Arg:
         - title: string with title of the movie we want the infos about.
    '''
    url = f"https://www.omdbapi.com/?apikey={api_key}&t={format_string(title)}"
    r = requests.get(url)
    if r.status_code != 200:
        print(f"ERROR {title}, Response Code: {r.status_code}")
        print("Request:", url)
        return {'Response': 'False'}
    return json.loads(r.text)

# print(request_omdb_from_title("Moi, Christiane F., 13 ans, droguée, prostituée..."))
# print(request_omdb_from_title("Christiane droguée"))
# print(request_omdb_from_title("Le Père Noël est une ordure"))
# print(request_omdb_from_title("Y a-t-il un Français Dans la Salle ?"))
# print(request_omdb_from_title("Travail au noir"))

def get_plot_and_thumbail_from_omdb(title):
    ''' return movie plot and thumbail through an API request.
        
        return: 
          - plot:      string containing the plot of the movie,
          - thumbnail: string containing the url of the thumbnail.

        Arg: title: string with the title of the movie.
    '''
    plot, thumbnail = '', ''
    res_dict = request_omdb_from_title(title)
    lst_keys = res_dict.keys()
    assert 'Response' in res_dict
    if res_dict['Response'] == 'True':
        # print('res_dict', res_dict)
        assert 'Plot' in lst_keys and 'Poster' in lst_keys
        if res_dict['Plot'] != 'N/A':
            plot = res_dict['Plot']
        if res_dict['Poster'] != '' and res_dict['Poster'] != 'N/A':
            thumbnail = res_dict['Poster']
    return plot + "AND" + thumbnail

## **Get the plot and the thumbnail from the omdb API**

In [70]:
df_movies['temp']      = df_movies['original_title'].apply(get_plot_and_thumbail_from_omdb)
df_movies['plot']      = df_movies['temp'].apply(lambda x : x.split('AND')[0])
df_movies['thumbnail'] = df_movies['temp'].apply(lambda x : x.split('AND')[1])
df_movies['plot']          = np.where(df_movies['plot'] != '', df_movies['plot'], df_movies['summary'])
df_movies['url_thumbnail'] = np.where(df_movies['thumbnail'] != '', df_movies['thumbnail'], df_movies['url_thumbnail'])
df_movies = df_movies[['title', 'original_title', 'plot', 'url_thumbnail']]

## **Store the data in a NoSQL database**

In [71]:
# Connect to MongoDB
client = pymongo.MongoClient("mongodb://localhost:27017/")

# Create database "allocine" (or selects it if already exists)
mydb = client["allocine"]
# client.drop_database("movies")

# Create a collection "movies" (table in SQL)
col_movies = mydb["movies"]
# col_movies.drop()

In [72]:
# Insertion of movie plots in MongoDB database
col_movies.insert_many(df_movies.to_dict(orient='records')) # TO DO ONLY ONCE
# col_movies.drop()

print(client.list_database_names())
print(mydb.list_collection_names())

for doc in list(col_movies.find().limit(5)):
    pprint.pprint(doc)

['Rennes', 'Rennes2', 'admin', 'allocine', 'config', 'local', 'mydatabase']
['movies']
{'_id': ObjectId('67abbeaf1e1d959e84245680'),
 'original_title': "L'Armée des Ombres",
 'plot': 'France 1942. Gerbier, ingénieur des Ponts et Chaussées est également '
         "l'un des chefs de la Résistance. Dénoncé et capturé, il est "
         "incarcéré dans un camp de prisonniers. Alors qu'il ",
 'title': "L'Armée des Ombres",
 'url_thumbnail': 'https://fr.web.img4.acsta.net/c_310_420/img/23/c1/23c1acd5c06be11bc9a64f448dae49f4.jpg'}
{'_id': ObjectId('67abbeaf1e1d959e84245681'),
 'original_title': 'Easy Rider',
 'plot': 'Two bikers head from L.A. to New Orleans through the open country '
         'and desert lands, and along the way they meet a man who bridges a '
         'counter-culture gap of which they had been unaware.',
 'title': 'Easy Rider',
 'url_thumbnail': 'https://m.media-amazon.com/images/M/MV5BMTc2MjI2NDc4Ml5BMl5BanBnXkFtZTgwODI4NzU0MTI@._V1_SX300.jpg'}
{'_id': ObjectId('67abbeaf