# <center> **Requesting an external API**

A partir d'une liste de titres de films nous allons requêter l'API publique [https://www.omdbapi.com](https://www.omdbapi.com)




In [None]:
%reset

## **Imports**

In [None]:
import math
import copy
import re
import json
import requests
import numpy as np
import pandas as pd
from tqdm import tqdm
from unidecode import unidecode

# MongoDB / Pymongo
import pymongo
from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi
import pprint

pd.set_option('display.max_rows', 10)
tqdm.pandas()

api_key = "b8dd5759"

## **Reading the data**


In [22]:
# df_movies = pd.read_csv('csv/movies_year_1982.csv', delimiter = ',')
# df_movies = pd.read_csv('csv/movies_decade_80.csv', delimiter = ',')
df_movies = pd.read_csv('csv/movies_year_1960_to_1970.csv', delimiter = ',', usecols=['original_title', 'summary'])
print("Nb movies :", df_movies.shape[0])
df_movies

Nb movies : 518


Unnamed: 0,original_title,summary
0,L'Armée des Ombres,"France 1942. Gerbier, ingénieur des Ponts et C..."
1,Easy Rider,Billy et Captain America disposent d'une gross...
2,Un homme qui me plaît,"Au cours d'un tournage à Los Angeles, une actr..."
3,Z,Un député progressiste est assassiné dans un p...
4,Butch Cassidy and the Sundance Kid,"Au début du XXe siècle, Butch Cassidy et son a..."
...,...,...
513,Ocean's 11,Gros casse à Las Vegas. Onze amis vétérans de ...
514,Home from the Hill,Un grand propriétaire du Sud tyrannise sa femm...
515,Die 1000 Augen des Dr. Mabuse,Un journaliste est tué dans sa voiture sur la ...
516,The Brides of Dracula,Marianne a accepté un poste d’institutrice dan...


In [28]:
def format_string(st):
    ''' format string 
        from "title of the movie" 
        to title+of+the+movie

        Arg: st string to be converted.
    '''
    res = ''
    for c in st:
        if c.isdigit() or c.isalpha() or c.isspace():
            res += unidecode(c)
        else:
            res += ' '
    return '+'.join([word for word in res.split() if len(word) > 1])

def request_omdb_from_title(title):
    ''' Request the omdb API
    
        return a json dictionary with the information about the movie.

        Arg:
         - title: string with title of the movie we want the infos about.
    '''
    url = f"https://www.omdbapi.com/?apikey={api_key}&t={format_string(title)}"
    r = requests.get(url)
    if r.status_code != 200:
        print(f"ERROR {title}, Response Code: {r.status_code}")
        print("Request:", url)
        return {'Response': 'False'}
    return json.loads(r.text)

# print(request_omdb_from_title("Moi, Christiane F., 13 ans, droguée, prostituée..."))
# print(request_omdb_from_title("Christiane droguée"))
# print(request_omdb_from_title("Le Père Noël est une ordure"))
# print(request_omdb_from_title("Y a-t-il un Français Dans la Salle ?"))
# print(request_omdb_from_title("Travail au noir"))

def get_movie_plot_from_omdb(title):
    ''' return plot of a movie from its title through an API request.
        
        return: string containing the plot of the movie,
        Arg: title: string with the title of the movie.
    '''
    res_dict = request_omdb_from_title(title)
    assert 'Response' in res_dict
    if res_dict['Response'] == 'True':
        # print('res_dict', res_dict)
        if res_dict:
            assert 'Plot' in list(res_dict.keys())
            if res_dict['Plot'] != 'N/A':
                return res_dict['Plot']
    return ''

## **Get the plot from the omdb API**

In [29]:
df_test = df_movies[:4]
df_test

Unnamed: 0,original_title,summary
0,L'Armée des Ombres,"France 1942. Gerbier, ingénieur des Ponts et C..."
1,Easy Rider,Billy et Captain America disposent d'une gross...
2,Un homme qui me plaît,"Au cours d'un tournage à Los Angeles, une actr..."
3,Z,Un député progressiste est assassiné dans un p...


In [30]:
df_test = df_movies[:4]
df_test['plot'] = df_test['original_title'].apply(get_movie_plot_from_omdb)
df_test

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_test['plot'] = df_test['original_title'].apply(get_movie_plot_from_omdb)


Unnamed: 0,original_title,summary,plot
0,L'Armée des Ombres,"France 1942. Gerbier, ingénieur des Ponts et C...",
1,Easy Rider,Billy et Captain America disposent d'une gross...,Two bikers head from L.A. to New Orleans throu...
2,Un homme qui me plaît,"Au cours d'un tournage à Los Angeles, une actr...",
3,Z,Un député progressiste est assassiné dans un p...,


In [34]:
df_test['plot'] = np.where(df_test['plot'] != '', df_test['plot'], df_test['summary'])
df_test

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_test['plot'] = np.where(df_test['plot'] != '', df_test['plot'], df_test['summary'])


Unnamed: 0,original_title,summary,plot
0,L'Armée des Ombres,"France 1942. Gerbier, ingénieur des Ponts et C...","France 1942. Gerbier, ingénieur des Ponts et C..."
1,Easy Rider,Billy et Captain America disposent d'une gross...,Two bikers head from L.A. to New Orleans throu...
2,Un homme qui me plaît,"Au cours d'un tournage à Los Angeles, une actr...","Au cours d'un tournage à Los Angeles, une actr..."
3,Z,Un député progressiste est assassiné dans un p...,Un député progressiste est assassiné dans un p...


## **Store the data in a NoSQL database**

In [None]:
# Connect to MongoDB
client = pymongo.MongoClient("mongodb://localhost:27017/")

# Create database "allocine" (or selects it if already exists)
mydb = client["allocine"]
# client.drop_database("movies")

# Create a collection "movies" (table in SQL)
col_movies = mydb["movies"]
col_movies.drop()


In [None]:

# Insertion of texts into MongoDB database
col_movies.insert_many(df_movies.to_dict(orient='records')) # TO DO ONLY ONCE
col_movies.drop()

# Insertion of texts into MongoDB database (ALREADY DONE)
# for index, row in df_data[:20].iterrows():
#     print(index, row)
#     col_txts.insert_one({"text" : row['text'], "label" : row["label"]})

# Display some infos
# (remark: nothing is created as long as there is no data in the collection)

print(client.list_database_names())
print(mydb.list_collection_names())

for doc in list(col_movies.find().limit(5)):
    pprint.pprint(doc)