# <center> **Requesting an external API**

A partir d'une liste de titres de films nous allons requêter l'API publique [https://www.omdbapi.com](https://www.omdbapi.com)

Nous enregistrerons les données non-structurées (résumé et affiche du film) dans une base NoSQL (MongoDB)

In [226]:
%reset

## **Imports**

In [1]:
import math
import copy
import re
import os
import json
import requests
import numpy as np
import pandas as pd
from tqdm import tqdm
from unidecode import unidecode

# MongoDB / Pymongo
import pymongo
from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi
import pprint
from dotenv import load_dotenv

pd.set_option('display.max_rows', 10)
tqdm.pandas()

# Environnement variables
load_dotenv()

OMDB_API_KEY = os.getenv("OMDB_API_KEY")

In [5]:
url = f"https://www.omdbapi.com/?apikey={OMDB_API_KEY}&t=in+the+lost+lands"
r = requests.get(url)
if r.status_code != 200:
    print("Error:", url)

json.loads(r.text)

{'Title': 'In the Lost Lands',
 'Year': '2025',
 'Rated': 'R',
 'Released': '07 Mar 2025',
 'Runtime': '101 min',
 'Genre': 'Action, Adventure, Fantasy',
 'Director': 'Paul W.S. Anderson',
 'Writer': 'Constantin Werner, Paul W.S. Anderson, George R.R. Martin',
 'Actors': 'Milla Jovovich, Dave Bautista, Arly Jover',
 'Plot': 'A queen sends the powerful and feared sorceress Gray Alys to the ghostly wilderness of the Lost Lands in search of a magical power, where the sorceress and her guide, the drifter Boyce, must outwit and outfight man and demon.',
 'Language': 'English',
 'Country': 'Germany, Canada, United States',
 'Awards': 'N/A',
 'Poster': 'https://m.media-amazon.com/images/M/MV5BOWYxYjEyYTUtY2FkZC00Y2QwLTk1ZjMtMTAyMTAyMzQ3MDZiXkEyXkFqcGc@._V1_SX300.jpg',
 'Ratings': [{'Source': 'Internet Movie Database', 'Value': '5.4/10'},
  {'Source': 'Metacritic', 'Value': '37/100'}],
 'Metascore': '37',
 'imdbRating': '5.4',
 'imdbVotes': '193',
 'imdbID': 'tt4419684',
 'Type': 'movie',
 'DV

## **Reading the data**


In [7]:
df_movies = pd.read_csv('csv/mongoDB_week_2025_02_12.csv', delimiter = ',', usecols=['title', 'original_title', 'plot', 'url_thumbnail'])
print("Nb movies :", df_movies.shape[0])
df_movies

Nb movies : 9


Unnamed: 0,title,original_title,plot,url_thumbnail
0,The Brutalist,The Brutalist,When a visionary architect and his wife flee p...,https://m.media-amazon.com/images/M/MV5BM2U0MW...
1,Bridget Jones : folle de lui,Bridget Jones: Mad About the Boy,Bridget Jones navigates life as a widow and si...,https://m.media-amazon.com/images/M/MV5BMDdlY2...
2,Captain America: Brave New World,Captain America: Brave New World,"Sam Wilson, the new Captain America, finds him...",https://m.media-amazon.com/images/M/MV5BNDRjY2...
3,Le Dernier souffle,Le Dernier souffle,A Montreal police officer goes to Arkansas to ...,https://m.media-amazon.com/images/M/MV5BM2RmZj...
4,Le Mohican,Le Mohican,"En plein cœur de l’été, Joseph, l’un des derni...",https://fr.web.img2.acsta.net/c_310_420/img/72...
5,Hola Frida,Hola Frida,C’est l’histoire d’une petite fille différente...,https://fr.web.img3.acsta.net/c_310_420/img/b8...
6,Prima la vita,Il tempo che si vuole,Un père et sa fille habitent les mondes de l’e...,https://fr.web.img6.acsta.net/c_310_420/img/a8...
7,Daffy et Porky sauvent le monde,The Day the Earth Blew Up: A Looney Tunes Movie,L’un des plus grands duos comiques de l’Histoi...,https://fr.web.img5.acsta.net/c_310_420/img/cc...
8,"La Vie, en gros",Zivot k sezrání,C’est la rentrée. Ben trouve que ses camarades...,https://fr.web.img6.acsta.net/c_310_420/img/a7...


In [77]:
def format_string(st):
    ''' format string 
        from "title of the movie" 
        to title+of+the+movie

        Arg: st string to be converted.
    '''
    res = ''
    for c in st:
        if c.isdigit() or c.isalpha() or c.isspace():
            res += unidecode(c)
        else:
            res += ' '
    return '+'.join([word for word in res.split() if len(word) > 1])

def request_omdb_from_title(title):
    ''' Request the omdb API
    
        return a json dictionary with the information about the movie.

        Arg:
         - title: string with title of the movie we want the infos about.
    '''
    url = f"https://www.omdbapi.com/?apikey={api_key}&t={format_string(title)}"
    r = requests.get(url)
    if r.status_code != 200:
        print(f"ERROR {title}, Response Code: {r.status_code}")
        print("Request:", url)
        return {'Response': 'False'}
    return json.loads(r.text)

def get_plot_and_thumbail_from_omdb(title):
    ''' return movie plot and thumbail through an API request.

        return: 
          - plot:      string containing the plot of the movie,
          - thumbnail: string containing the url of the thumbnail.

        Arg: title: string with the title of the movie.
    '''
    plot, thumbnail = '', ''
    res_dict = request_omdb_from_title(title)
    lst_keys = res_dict.keys()
    assert 'Response' in res_dict
    if res_dict['Response'] == 'True':
        # print('res_dict', res_dict)
        assert 'Plot' in lst_keys and 'Poster' in lst_keys
        if res_dict['Plot'] != 'N/A':
            plot = res_dict['Plot']
        if res_dict['Poster'] != '' and res_dict['Poster'] != 'N/A':
            thumbnail = res_dict['Poster']
    return plot + "AND" + thumbnail

## **Get the plot and the thumbnail from the omdb API**

In [78]:
df_movies['temp']      = df_movies['original_title'].apply(get_plot_and_thumbail_from_omdb)
df_movies['plot']      = df_movies['temp'].apply(lambda x : x.split('AND')[0])
df_movies['thumbnail'] = df_movies['temp'].apply(lambda x : x.split('AND')[1])
df_movies['plot']          = np.where(df_movies['plot'] != '', df_movies['plot'], df_movies['summary'])
df_movies['url_thumbnail'] = np.where(df_movies['thumbnail'] != '', df_movies['thumbnail'], df_movies['url_thumbnail'])
df_movies = df_movies[['title', 'original_title', 'plot', 'url_thumbnail']]

In [8]:
df_movies

Unnamed: 0,title,original_title,plot,url_thumbnail
0,The Brutalist,The Brutalist,When a visionary architect and his wife flee p...,https://m.media-amazon.com/images/M/MV5BM2U0MW...
1,Bridget Jones : folle de lui,Bridget Jones: Mad About the Boy,Bridget Jones navigates life as a widow and si...,https://m.media-amazon.com/images/M/MV5BMDdlY2...
2,Captain America: Brave New World,Captain America: Brave New World,"Sam Wilson, the new Captain America, finds him...",https://m.media-amazon.com/images/M/MV5BNDRjY2...
3,Le Dernier souffle,Le Dernier souffle,A Montreal police officer goes to Arkansas to ...,https://m.media-amazon.com/images/M/MV5BM2RmZj...
4,Le Mohican,Le Mohican,"En plein cœur de l’été, Joseph, l’un des derni...",https://fr.web.img2.acsta.net/c_310_420/img/72...
5,Hola Frida,Hola Frida,C’est l’histoire d’une petite fille différente...,https://fr.web.img3.acsta.net/c_310_420/img/b8...
6,Prima la vita,Il tempo che si vuole,Un père et sa fille habitent les mondes de l’e...,https://fr.web.img6.acsta.net/c_310_420/img/a8...
7,Daffy et Porky sauvent le monde,The Day the Earth Blew Up: A Looney Tunes Movie,L’un des plus grands duos comiques de l’Histoi...,https://fr.web.img5.acsta.net/c_310_420/img/cc...
8,"La Vie, en gros",Zivot k sezrání,C’est la rentrée. Ben trouve que ses camarades...,https://fr.web.img6.acsta.net/c_310_420/img/a7...


## **Store the data in a NoSQL database**

In [9]:
# Connect to MongoDB
client = pymongo.MongoClient("mongodb://localhost:27017/")

# Create database "allocine" (or selects it if already exists)
mydb = client["allocine"]
# client.drop_database("movies")

# Create a collection "movies" (table in SQL)
col_movies = mydb["movies"]
# col_movies.drop()

In [10]:
# Insertion of movie plots in MongoDB database
col_movies.insert_many(df_movies.to_dict(orient='records')) # TO DO ONLY ONCE
# col_movies.drop()

print(client.list_database_names())
print(mydb.list_collection_names())

# for doc in list(col_movies.find().limit(5)):
#     pprint.pprint(doc)

print("Nb documents:", col_movies.count_documents({}))

['Rennes', 'Rennes2', 'admin', 'allocine', 'config', 'local', 'mydatabase', 'test_queries']
['movies']
Nb documents: 8832


## **Checking what is in the Mongo DB**

In [83]:
names = ['csv/movies_year_1960_to_1970.csv',
         'csv/movies_year_1970_to_1980.csv',
         'csv/movies_year_1980_to_1990.csv',
         'csv/movies_year_1990_to_1995.csv',
         'csv/movies_year_1995_to_2000.csv',
         'csv/movies_year_2000_to_2003.csv',
         'csv/movies_year_2003_to_2006.csv',
         'csv/movies_year_2006_to_2010.csv',
         'csv/movies_year_2010_to_2015.csv',
         'csv/movies_year_2015_to_2019.csv',
         'csv/movies_year_2019_to_2022.csv',
         'csv/movies_year_2022_to_2025.csv',
         'csv/movies_year_2025_week_1_to_5.csv',
         ]

lst_df = [pd.read_csv(name, delimiter = ',', usecols=['title', 'original_title', 'summary', 'url_thumbnail'])
          for name in names]

for df in lst_df:
    print(df.shape[0])

df_csv = pd.concat(lst_df)
print("Nb total de films en base SQL", df_csv.shape[0])
# df_csv.to_csv('csv/mongoDB_all_years.csv', sep=',', index = False)

# Reading data from Mongo DB
client = pymongo.MongoClient("mongodb://localhost:27017/")
mydb = client["allocine"]
col_movies = mydb["movies"]
print("Nb documents MongoDB:", col_movies.count_documents({}))

# 1960 to 2010 stored in Mongo DB
518 + 678 + 869 + 585 + 807 + 729 + 980 + 600 + 750 + 800 + 750 + 750 + 24

518
678
869
585
807
729
980
600
750
800
750
750
24
Nb total de films en base SQL 8840
Nb documents MongoDB: 8823


8840

In [84]:
# Store data in csv file
lst = list(col_movies.find())
df_mongo = pd.DataFrame(lst, columns = ['_id', 'title', 'original_title', 'plot', 'url_thumbnail'])
print(df_mongo.shape[0])
df_mongo.to_csv('csv/mongoDB_1960_to_2025_week_5.csv', sep=',', index = False)

8823


**Since we added twice the same 807 movies, we have to remove them from MongoDB**

In [None]:
client = pymongo.MongoClient("mongodb://localhost:27017/")
mydb = client["allocine"]
col_movies = mydb["movies"]

print("Nb documents:", col_movies.count_documents({}))
col_movies.delete_many({'original_title' : {'$in' : df_movies['original_title'].values.tolist()} })
print("Nb documents:", col_movies.count_documents({}))

Nb documents: 4264
Nb documents: 2639


**Looks like more documents have been deleted**<br>
So we will drop the whole data base and use the csv 'mongoDB.csv' to restore the previous database and re-add the movies needed.

In [None]:
client = pymongo.MongoClient("mongodb://localhost:27017/")
client.drop_database('allocine')

# Recreate DB
client = pymongo.MongoClient("mongodb://localhost:27017/")
mydb = client["allocine"]
col_movies = mydb["movies"]

## **Query MongoDB Using PyMongo**

### **We create another DB**

In [184]:
client2 = pymongo.MongoClient("mongodb://localhost:27017/")
mydb2 = client2["test_queries"]
# client.drop_database("test_queries")

# Create a collection "movies" (table in SQL)
col_fruits = mydb2["fruits"]
# col_fruits.drop()

col_fruits.insert_many([
        # { "_id": 1, "name": "apples", "qty": 5, "rating": 3, "color": "red", "type": ["fuji", "honeycrisp"] },
        # { "_id": 2, "name": "bananas", "qty": 7, "rating": 4, "color": "yellow", "type": ["cavendish"] },
        { "_id": 3, "name": "oranges", "qty": 6, "rating": 2, "type": ["naval", "mandarin"] },
        { "_id": 4, "name": "pineapple", "qty": 3, "rating": 5, "color": "yellow" },
        { "_id": 5, "name": "test", "qty": 10, "rating": 3, "color": "green", "type": ["fuji", "typeXXX"] },
    ])
# col_fruits.drop()

print(client2.list_database_names())
print(mydb2.list_collection_names())

['Rennes', 'Rennes2', 'admin', 'allocine', 'config', 'local', 'mydatabase', 'test_queries']
['fruits']


**Sources**<br>
https://www.mongodb.com/docs/languages/python/pymongo-driver/current/read/specify-a-query/<br>
https://www.w3resource.com/mongodb/introduction-mongodb.php<br>
https://www.mongodbtutorial.org/mongodb-crud/mongodb-findone/<br>
https://geekflare.com/fr/mongodb-queries-examples/<br>


**Exact Match**

In [175]:
results = col_movies.find({ "title": "Eyes Wide Shut" })
results.to_list()

[{'_id': ObjectId('67b1ae3a1e1d959e842460ed'),
  'title': 'Eyes Wide Shut',
  'original_title': 'Eyes Wide Shut',
  'plot': "A Manhattan doctor embarks on a bizarre, night-long odyssey after his wife's admission of unfulfilled longing.",
  'url_thumbnail': 'https://m.media-amazon.com/images/M/MV5BZTQ0MmM5MDAtYmYyZS00MzlmLTlhZTAtZDJlZWY5ZTZkZjZmXkEyXkFqcGc@._V1_SX300.jpg'},
 {'_id': ObjectId('67b1ae4a1e1d959e84246414'),
  'title': 'Eyes Wide Shut',
  'original_title': 'Eyes Wide Shut',
  'plot': "A Manhattan doctor embarks on a bizarre, night-long odyssey after his wife's admission of unfulfilled longing.",
  'url_thumbnail': 'https://m.media-amazon.com/images/M/MV5BZTQ0MmM5MDAtYmYyZS00MzlmLTlhZTAtZDJlZWY5ZTZkZjZmXkEyXkFqcGc@._V1_SX300.jpg'}]

In [144]:
results = col_fruits.find({ "color": "yellow" })
results.to_list()

[{'_id': 2,
  'name': 'bananas',
  'qty': 7,
  'rating': 4,
  'color': 'yellow',
  'type': ['cavendish']},
 {'_id': 4, 'name': 'pineapple', 'qty': 3, 'rating': 5, 'color': 'yellow'}]

**Partial Match** ???

Avec des index

https://www.mongodb.com/docs/manual/reference/operator/query/text/

In [62]:
client = pymongo.MongoClient("mongodb://localhost:27017/")
mydb = client["allocine"]

col_movies = mydb["movies"]
results = col_movies.find({ "title": "The Brutalist" })
results.to_list()

[]

In [61]:
col_movies.delete_one({'title' : "The Brutalist"})

DeleteResult({'n': 1, 'ok': 1.0}, acknowledged=True)

**Comparison operators**

list of operators: https://www.mongodb.com/docs/manual/reference/operator/query-comparison/

In [143]:
results = col_fruits.find({ "rating": { "$gt" : 2 }})
for f in results:
    print(f) 

{'_id': 1, 'name': 'apples', 'qty': 5, 'rating': 3, 'color': 'red', 'type': ['fuji', 'honeycrisp']}
{'_id': 2, 'name': 'bananas', 'qty': 7, 'rating': 4, 'color': 'yellow', 'type': ['cavendish']}
{'_id': 4, 'name': 'pineapple', 'qty': 3, 'rating': 5, 'color': 'yellow'}


**Logical operators**

https://www.mongodb.com/docs/manual/reference/operator/query-logical/

In [147]:
results = col_fruits.find({ 
    "$or": [
        { "qty": { "$gt": 5 }},
        { "color": "yellow" }
    ]
})
for f in results:
    print(f)

{'_id': 2, 'name': 'bananas', 'qty': 7, 'rating': 4, 'color': 'yellow', 'type': ['cavendish']}
{'_id': 3, 'name': 'oranges', 'qty': 6, 'rating': 2, 'type': ['naval', 'mandarin']}
{'_id': 4, 'name': 'pineapple', 'qty': 3, 'rating': 5, 'color': 'yellow'}


**Arrays operators**

https://www.mongodb.com/docs/manual/reference/operator/query-array/

In [158]:
for f in col_fruits.find({}):
    print(f)

print('----')
results = col_fruits.find({
    "type" : { "$size": 2 }
})

for f in results:
    print(f)

{'_id': 1, 'name': 'apples', 'qty': 5, 'rating': 3, 'color': 'red', 'type': ['fuji', 'honeycrisp']}
{'_id': 2, 'name': 'bananas', 'qty': 7, 'rating': 4, 'color': 'yellow', 'type': ['cavendish']}
{'_id': 3, 'name': 'oranges', 'qty': 6, 'rating': 2, 'type': ['naval', 'mandarin']}
{'_id': 4, 'name': 'pineapple', 'qty': 3, 'rating': 5, 'color': 'yellow'}
{'_id': 5, 'name': 'test', 'qty': 10, 'rating': 3, 'color': 'green', 'type': ['fuji', 'typeXXX']}
----
{'_id': 1, 'name': 'apples', 'qty': 5, 'rating': 3, 'color': 'red', 'type': ['fuji', 'honeycrisp']}
{'_id': 3, 'name': 'oranges', 'qty': 6, 'rating': 2, 'type': ['naval', 'mandarin']}
{'_id': 5, 'name': 'test', 'qty': 10, 'rating': 3, 'color': 'green', 'type': ['fuji', 'typeXXX']}


In [159]:
results = col_fruits.find({
    "type" : { "$all": ['fuji'] }
})

for f in results:
    print(f)

{'_id': 1, 'name': 'apples', 'qty': 5, 'rating': 3, 'color': 'red', 'type': ['fuji', 'honeycrisp']}
{'_id': 5, 'name': 'test', 'qty': 10, 'rating': 3, 'color': 'green', 'type': ['fuji', 'typeXXX']}


**Element Operator**

to check if a field exists

In [160]:
results = col_fruits.find( { "color" : { "$exists": "true" }} )
for f in results:
    print(f)

{'_id': 1, 'name': 'apples', 'qty': 5, 'rating': 3, 'color': 'red', 'type': ['fuji', 'honeycrisp']}
{'_id': 2, 'name': 'bananas', 'qty': 7, 'rating': 4, 'color': 'yellow', 'type': ['cavendish']}
{'_id': 4, 'name': 'pineapple', 'qty': 3, 'rating': 5, 'color': 'yellow'}
{'_id': 5, 'name': 'test', 'qty': 10, 'rating': 3, 'color': 'green', 'type': ['fuji', 'typeXXX']}


**Evaluation operators**

https://www.mongodb.com/docs/manual/reference/operator/query-evaluation/

In [None]:
results = col_fruits.find({ "name" : { "$regex" : "p{2,}" }} ) ## At least 2 consecutives p
for f in results:
    print(f)

{'_id': 1, 'name': 'apples', 'qty': 5, 'rating': 3, 'color': 'red', 'type': ['fuji', 'honeycrisp']}
{'_id': 4, 'name': 'pineapple', 'qty': 3, 'rating': 5, 'color': 'yellow'}


**Delete data**

In [None]:
# Delete_one
for f in col_fruits.find({}):
    print(f)

col_fruits.delete_one({'_id' : 4})

for f in col_fruits.find({}):
    print(f)

{'_id': 1, 'name': 'apples', 'qty': 5, 'rating': 3, 'color': 'red', 'type': ['fuji', 'honeycrisp']}
{'_id': 2, 'name': 'bananas', 'qty': 7, 'rating': 4, 'color': 'yellow', 'type': ['cavendish']}
{'_id': 3, 'name': 'oranges', 'qty': 6, 'rating': 2, 'type': ['naval', 'mandarin']}
{'_id': 5, 'name': 'test', 'qty': 10, 'rating': 3, 'color': 'green', 'type': ['fuji', 'typeXXX']}
{'_id': 4, 'name': 'pineapple', 'qty': 3, 'rating': 5, 'color': 'yellow'}
{'_id': 1, 'name': 'apples', 'qty': 5, 'rating': 3, 'color': 'red', 'type': ['fuji', 'honeycrisp']}
{'_id': 2, 'name': 'bananas', 'qty': 7, 'rating': 4, 'color': 'yellow', 'type': ['cavendish']}
{'_id': 3, 'name': 'oranges', 'qty': 6, 'rating': 2, 'type': ['naval', 'mandarin']}
{'_id': 5, 'name': 'test', 'qty': 10, 'rating': 3, 'color': 'green', 'type': ['fuji', 'typeXXX']}
{'_id': 4, 'name': 'pineapple', 'qty': 3, 'rating': 5, 'color': 'yellow'}


In [None]:
# Delete_many
for f in col_fruits.find({}):
    print(f)

col_fruits.delete_many({'_id' : {'$gte' : 3} })

for f in col_fruits.find({}):
    print(f)

{'_id': 1, 'name': 'apples', 'qty': 5, 'rating': 3, 'color': 'red', 'type': ['fuji', 'honeycrisp']}
{'_id': 2, 'name': 'bananas', 'qty': 7, 'rating': 4, 'color': 'yellow', 'type': ['cavendish']}
{'_id': 3, 'name': 'oranges', 'qty': 6, 'rating': 2, 'type': ['naval', 'mandarin']}
{'_id': 4, 'name': 'pineapple', 'qty': 3, 'rating': 5, 'color': 'yellow'}
{'_id': 5, 'name': 'test', 'qty': 10, 'rating': 3, 'color': 'green', 'type': ['fuji', 'typeXXX']}
{'_id': 1, 'name': 'apples', 'qty': 5, 'rating': 3, 'color': 'red', 'type': ['fuji', 'honeycrisp']}
{'_id': 2, 'name': 'bananas', 'qty': 7, 'rating': 4, 'color': 'yellow', 'type': ['cavendish']}
{'_id': 3, 'name': 'oranges', 'qty': 6, 'rating': 2, 'type': ['naval', 'mandarin']}
{'_id': 4, 'name': 'pineapple', 'qty': 3, 'rating': 5, 'color': 'yellow'}
{'_id': 5, 'name': 'test', 'qty': 10, 'rating': 3, 'color': 'green', 'type': ['fuji', 'typeXXX']}


In [186]:
# Delete_many
for f in col_fruits.find({}):
    print(f)

col_fruits.delete_many({'_id' : {'$in' : [1, 3, 5]} })

for f in col_fruits.find({}):
    print(f)

{'_id': 1, 'name': 'apples', 'qty': 5, 'rating': 3, 'color': 'red', 'type': ['fuji', 'honeycrisp']}
{'_id': 2, 'name': 'bananas', 'qty': 7, 'rating': 4, 'color': 'yellow', 'type': ['cavendish']}
{'_id': 3, 'name': 'oranges', 'qty': 6, 'rating': 2, 'type': ['naval', 'mandarin']}
{'_id': 4, 'name': 'pineapple', 'qty': 3, 'rating': 5, 'color': 'yellow'}
{'_id': 5, 'name': 'test', 'qty': 10, 'rating': 3, 'color': 'green', 'type': ['fuji', 'typeXXX']}
{'_id': 2, 'name': 'bananas', 'qty': 7, 'rating': 4, 'color': 'yellow', 'type': ['cavendish']}
{'_id': 4, 'name': 'pineapple', 'qty': 3, 'rating': 5, 'color': 'yellow'}


In [2]:
import pymongo
client = pymongo.MongoClient("mongodb://localhost:27017/")
mydb = client["movies"]
col_movies = mydb["movies"]

In [5]:
test = col_movies.find_one({ "title" : "Eyes Wide Shut" })
print(test)

None
