In [1]:
from bs4 import BeautifulSoup
import requests

from tqdm.autonotebook import tqdm
import pandas as pd
import numpy as np
import json

from string import punctuation
from enum import Enum
import re

from PIL import Image



In [2]:
artworks_df = pd.read_csv("data/catalogue_artworks.csv")

In [3]:
#Certains artistes ont des noms composés dans le catalogue mais pas dans les bases de données externes.
#On va donc faire les recherches avec le premier et le second nom d'auteur uniquement.

#Mais est-ce que le nom connu de l'auteur est son second ou son premier ? Est-ce que le nom de famille est suffisant
#pour tirer des conclusions ? Il faudra faire des tests avec différents patterns.

#On constate aussi que sur Wikidata, certains autheurs ne sont pas connu avec leur nom complet, comme Rembrandt.

def standardize_string(s):
    p = punctuation.replace("'","")
    
    s = str(s)
    s = s.lower()
    s = s.replace("-"," ")
    s = s.strip(" ")
    #s = s.replace("l'","@") #Ne pas oublier les d' et autres si on applique cette méthode
    s = s.translate({ord(i) : None for i in p})
    #s = s.replace("@","l'")
    return s

In [4]:
#joconde_df = pd.read_csv("data/base-joconde-extrait.csv", sep = ";")

In [5]:
#joconde_df.Titre = joconde_df.Titre.apply(standardize_string)
#joconde_df.Auteur = joconde_df.Auteur.apply(standardize_string)

artworks_df.title = artworks_df.title.apply(standardize_string)
#artworks_df.author = artworks_df.author.apply(standardize_string)
artworks_df.author = artworks_df.author.apply(lambda a : standardize_string("".join(a.split(",")[:2])).split(" "))

# Essai avec Wikidata

In [6]:
def find_property(soup,prop):
    for division in soup.findAll("div", {"data-property-id":prop}):
        for k, value in enumerate(division.\
                  findAll("div",{"class":"wikibase-snakview-value wikibase-snakview-variation-valuesnak"})):
            if k == 0:
                return value.text
                
def find_width(soup):
    return find_property(soup,"P2049")

def find_height(soup):
    return find_property(soup,"P2048")

def find_image_joconde(soup):
    joconde_id = find_property(soup,"P347")
    if joconde_id != None:
        url = "https://www.pop.culture.gouv.fr/notice/joconde/"+joconde_id

        soup_jocond = BeautifulSoup(requests.get(url).text,"html.parser")

        for image in soup_jocond.findAll("img"):
            if image["src"].startswith("http"):
                return image["src"]

def find_image(soup):
    for image in soup.findAll("meta", {"property":"og:image"}):
        return image["content"]
    
    #If image not in WikiData, tries to find joconde ref.
    return find_image_joconde(soup)


    
for index, row in tqdm(artworks_df.iterrows()):
    got_url = False
    
    url_base = "https://www.wikidata.org/w/index.php?search=&search="
    
    url = url_base+"+"+row.author[0]+"+"+row.author[-1]+"+"+row.title.replace(" ","+")
    
    #print(url)
    
    r1 = requests.get(url)
    
    soup_search = BeautifulSoup(r1.text, "html.parser")
    
    for i, li in enumerate(soup_search.findAll("li",class_ = "mw-search-result")):
        if i == 0:
            for j, a in enumerate(li.findAll("a")):
                if j == 0:
                    got_url = True
                    
                    url_painting = "https://www.wikidata.org" + a["href"]
                    r2 = requests.get(url_painting)
                    
                    #print(url_painting)
                    soup_painting = BeautifulSoup(r2.text,"html.parser")
                    
                    artworks_df.loc[index,"width"] = find_width(soup_painting)
                    
                    artworks_df.loc[index,"height"] = find_height(soup_painting)
                    
                    artworks_df.loc[index,"image_url"] = find_image(soup_painting)
                    

                

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




In [7]:
artworks_df.to_csv("temp/wikidata_artworks_apostrophe_joconde.csv")

Un point auquel faire attention : s'il y a plusieurs résultats, on considère actuellement le premier. On pourrait affiner la recherche en choisissant le résultat qui es encore conservé actuellement au Louvre.

In [8]:
artworks_df = pd.read_csv("temp/wikidata_artworks_apostrophe_joconde.csv")

print(len(artworks_df)) # Nombre d'oeuvres initial
print(len(artworks_df.dropna())) # Oeuvres complètement trouvées
print(len(artworks_df["image_url"].dropna())) # Image trouvée
print(len(artworks_df[["width","height"]].dropna())) # Dimensions trouvées

# Idée de Raphael : faire plusieurs recherches de moins en moins restrictives
# (avec indice de confiance de plus en plus bas) pour trouver un maximum
# d'images possibles.
# Par exemple, séparer les titres qui contiennent un "ou"

# Utiliser SPARQL sur wikidata pour trouver des résultats le plus pertinents possible (l'objet est un tableau,
# l'objet a été conservé au Louvre, etc...

#Query wikimedia commons plutôt que wikidata

#Utiliser les images du catalogue lui-même !

2254
590
620
626


In [9]:
artworks_df.sample(5)

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,number,author,life,title,position,width,height,image_url
1334,1334,1584,1249,"['castelli', 'valerio']",(1625-1659).,le frappement du rocher,I-E,,,
1241,1241,1482,S. Nu,"['antoniazzo', 'romano', 'ant', 'aquili', 'dit']",(2TUE moitié du XVe siècle).,la vierge et l'enfant v,I tr. A-N,,,
1262,1262,1507,S. N°,"['bazzi', 'giovanni', 'antonio']",(1177-1510)-,sujet allégorique l'amour et la chasteté,Coll. Schlichting,,,
491,491,540,315,"['gellée', 'claude']",(1000-1082).,david sacré roi par samuel,XIV-S,150±1 Q174728,119±1 Q174728,https://upload.wikimedia.org/wikipedia/commons...
1550,1550,1832,1533,"['solario', 'andréa']",(vers 1465- après 1515).,tête de saint jean baptiste v,I tr. B-S,,,


# Sauvegarde finale des données

In [10]:
artworks_df.to_csv("data/complete_artworks.csv")

# Essai avec la base de donnée Joconde (abandonné)

In [6]:
joconde_df.head()

Unnamed: 0,Numéro de l'objet,Domaine,Dénomination,Appellation,Titre,Auteur,Précisions sur l'auteur,Lieu de création,Période de création,Millésime,...,ID-notice,Lieu de conservation,Identifiant Museofile,Date d'import,Date de mise à jour,Label Musée de France,Ecole,Dépôt,ville,geolocalisation_ville
0,97.1.68.(2),dessin,,,paysan assis de profil,colin paul emile,"Lunéville, 1867 ; Bourg-la-Reine, 1949",,1er quart 20e siècle,1902.0,...,05120002612,Nancy;musée des Beaux-Arts,M0512,2003-05-14,,Musée de France#au sens de la loi n°2002-5 du ...,France,,Nancy,"48.692365,6.182711"
1,97.1.116,estampe,,,femme lavant son linge,colin paul emile,"Lunéville, 1867 ; Bourg-la-Reine, 1949",,1er quart 20e siècle,1908.0,...,05120002553,Nancy;musée des Beaux-Arts,M0512,2003-05-14,,Musée de France#au sens de la loi n°2002-5 du ...,France,,Nancy,"48.692365,6.182711"
2,68.5.2,beaux-arts;dessin,,,environs de pau château de bizanos,galos victor,"GALOS : Pau, 1828 ; Pau, 1879",,3e quart 19e siècle,1861.0,...,00980000134,Pau;musée des beaux-arts,M0098,2003-05-21,2014-10-22,Musée de France#au sens de la loi n°2002-5 du ...,France,,Pau,"43.300199,-0.37085"
3,Inv 20195,peinture,tableau;fragment,,saint abbe bernard ou bruno,maitre de lassomption de la madeleine,,,1er quart 16e siècle,,...,000PE012731,Avignon;musée du Petit Palais,M0947,,2005-01-31,Musée de France#au sens de la loi n°2002-5 du ...,Italie ; Ferrare,en dépôt ; Avignon ; musée du Petit Palais,Avignon,"43.947855,4.807592"
4,00.100.2,peinture,tableau,,a capri,carmelina,,,20e siècle,,...,07660016338,Laval;musée du Vieux Château,M0766,2003-05-26,2003-06-02,Musée de France#au sens de la loi n°2002-5 du ...,,,Laval,"48.072653,-0.770364"


In [7]:
artworks_df.head(10)

Unnamed: 0.1,Unnamed: 0,number,author,life,title,position
0,0,*,"[alaux, jean]",(1786-1864').,poussin arrivant de rome est présenté par rich...,Ire S. de la Céramique antique
1,1,*,"[alaux, jean]",(1786-1864').,douze médaillons dor représentant les travaux ...,Ire S. de la Céramique antique
2,2,2,"[aligny, claude, françois, théodore, caruelle, d]",(r 798- 1871).,une villa itajienne,VIII-E
3,3,S. N°,"[amaury, duval]",(1808-1885).,portrait de mille x,Palier esc.T.T.
4,4,9,"[aved, andré, joseph]",(1702-1766).,portrait du marquis de mirabeau,XVI-S
5,5,*,"[balze, paul, jean, etienne]",(1815-1884),et balze raymond joseph antoine 1818 1909 hom...,"Musée Charles X Salle IX Baptiste, J.-B. Monno..."
6,6,2800,"[barye, antoine, louis]",(1795-1875).,lions près de leur antre,T. T.
7,7,(209),"[barye, antoine, louis]",(1795-1875).,tigre cherchant une proie aquarelle,Coll. Camondo
8,8,"S. N""","[barye, antoine, louis]",(1795-1875).,portrait dune fille de lartiste,S. Barye
9,9,"S. N""","[barye, antoine, louis]",(1795-1875).,le jean de paris forêt de fontainebleau,S. Barye


In [10]:
for row in tqdm(artworks_df.head().iterrows()):
    #Certains auteurs ont un titre en troisième position (exemple : baron). On ne garde que le nom et le prénom.
    #author = standardize_string("".join(row[1].author.split(",")[:2])).split(" ")
    #title = standardize_string(row[1].title)
    
    cond = (joconde_df.Auteur.str.contains(row[1].author[0])) & (joconde_df.Titre.apply(lambda text : text in row[1].title))
    
    print(joconde_df[cond])

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Empty DataFrame
Columns: [Numéro de l'objet, Domaine, Dénomination, Appellation, Titre, Auteur, Précisions sur l'auteur, Lieu de création, Période de création, Millésime, Période de l'original copié, Epoque, Utilisation, Période d'utilisation, Millésime d'utilisation, Dimensions, Matériaux-techniques, Géographie historique, Découverte, Sujet, Source de la représentation, Onomastique, Statut juridique, Date d'acquisition, Date de dépôt, Ancien dépôt, ID-notice, Lieu de conservation, Identifiant Museofile, Date d'import, Date de mise à jour, Label Musée de France, Ecole, Dépôt, ville, geolocalisation_ville]
Index: []

[0 rows x 36 columns]
                   Numéro de l'objet   Domaine Dénomination Appellation  \
426974  INV 493 ; F 2869 C ; LP 5861  peinture      tableau         NaN   

          Titre                    Auteur       Précisions sur l'auteur  \
426974  hercule  alaux jean le romain dit  Bordeaux, 1786 ; Paris, 1864   

                        Lieu de création  Période de

KeyboardInterrupt: 

In [None]:
#Problème des oeuvres d'art dont le titre ne match pas exactement...

In [None]:
def similarity(row):
    return (row["author"] in row["Auteur"] or row["Auteur"] in row["author"])# and\
            #(row["title"] in row["Titre"] or row["title"] in row["Titre"])

fields = ["number","author","title","position","Titre","Auteur","ID-notice"]
full_artworks_df = artworks_df.merge(joconde_df, left_on = "title", right_on = "Titre")[fields]

full_artworks_df = full_artworks_df[full_artworks_df.apply(similarity, axis = 1)]

In [None]:
full_artworks_df[full_artworks_df.author == "barye antoine louis"]

# Essai avec la base de donnée Joconde en ligne (abandonné)

In [6]:
r = requests.get("https://www.pop.culture.gouv.fr/search/list?base=%5B%22Collections%20des%20mus%C3%A9es%20de%20France%20%28Joconde%29%22%5D&mainSearch=Soleil%20couchant%20sur%20un%20marais%20jul%C3%A9s%20dupr%C3%A9")

soup = BeautifulSoup(r.text, "html.parser")

#print(soup)

for item in soup.find_all("a"):
    print(item)

<a class="jsx-3693354677 logo" href="/"><img alt="Logo" class="jsx-3693354677 md" src="/static/logo.png"/><h1 class="jsx-3693354677">Ministère de la Culture</h1></a>
<a class="jsx-3693354677 btn btn-outline-danger d-none d-sm-block" href="https://fier2.typeform.com/to/Qyz3xv" rel="noopener" target="_blank">Votre avis est utile</a>
<a class="active nav-link">LISTE</a>
<a class="nav-link">CARTE</a>
<a class="nav-link">MOSAIQUE</a>
<a class="jsx-3693354677" href="https://pop-general.s3.eu-west-3.amazonaws.com/POP_En_savoir_plus.pdf" rel="noopener" target="_blank">À propos</a>
<a class="jsx-3693354677" href="/opendata">Télécharger les bases</a>
<a class="jsx-3693354677" href="mailto:pop@culture.gouv.fr" rel="noopener" target="_blank">Nous contacter</a>
<a class="jsx-3693354677" href="/tracking">Suivi d'audience et vie privée</a>


In [7]:
print(r.text)

<!DOCTYPE html><html lang="fr"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"/><meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no"/><meta name="google-site-verification" content="AwpDhFkuFQsZzA8EKSQ6nI4OYbCkAvHKKFf4dYVdytU"/><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/4.2.1/css/bootstrap.min.css"/><link rel="stylesheet" type="text/css" charSet="UTF-8" href="https://cdnjs.cloudflare.com/ajax/libs/slick-carousel/1.6.0/slick.min.css"/><link rel="stylesheet" type="text/css" href="https://cdnjs.cloudflare.com/ajax/libs/slick-carousel/1.6.0/slick-theme.min.css"/><link rel="manifest" href="/manifest.json"/><link rel="shortcut icon" href="/favicon.ico"/><link rel="stylesheet" type="text/css" href="/static/nprogress.css"/><script src="/static/piwik.js"></script><meta name="viewport" content="width=device-width,minimum-scale=1,initia




In [10]:
base = """https://www.pop.culture.gouv.fr/search/list?base=%5B%22Collections%20des%20mus%C3%A9es%20de%20France%20%28Joconde%29%22%5D&mainSearch=%22"""
space = """%20"""

for i, row in tqdm(artworks_df.head().iterrows()):
    #Certains auteurs ont un titre en troisième position (exemple : baron). On ne garde que le nom et le prénom.
    #author = standardize_string("".join(row[1].author.split(",")[:2])).split(" ") On standardise plus haut
    #title = standardize_string(row[1].title)
    url = base + row.title.replace(" ",space) + space + row.author[-1] + space + row.author[0]
    
    r = requests.get(url)
    soup = BeautifulSoup(r.text, "html.parser")
    
    print(soup.find_all("a",class_ = "list-card"))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

[]
[]
[]
[]
[]



In [None]:
full_artworks_df.apply(lambda row : row["Titre"] == row["title"], axis = 1)

In [None]:
joconde_df[joconde_df["Titre"] == "soleil couchant sur un marais"]

In [None]:
artworks_df[artworks_df["title"].str.contains("soleil couchant sur un marais")]

In [None]:
joconde_df[joconde_df["Titre"] == "lions pres de leur antre"]

In [None]:
artworks_df[artworks_df["title"].str.contains("lions près de leur antre")]

In [17]:
import json
#import urllib2

url_test = "https://api.pop.culture.gouv.fr/search/merimee,palissy,memoire,joconde,mnr,museo,enluminures/_msearch"
data_test = { 
   "query":{ 
      "bool":{ 
         "must":[ 
            { 
               "bool":{ 
                  "should":[ 
                     { 
                        "multi_match":{ 
                           "query":"soleil couchant sur un marais",
                           "operator":"and",
                           "fields":[ 
                              "TICO.strict^10",
                              "AUTR.strict^10",
                              "TITRE.strict^9",
                              "TITR.strict^9",
                              "LEG.strict^9",
                              "LOCA.strict^9",
                              "AUTOEU.strict^9",
                              "AUTOR.strict^9",
                              "AUTG.strict^9",
                              "DENO.strict^8",
                              "DOMN.strict^8",
                              "EDIF.strict^8",
                              "OBJT.strict^8",
                              "REPR.strict^8",
                              "AUTP.strict^7",
                              "SERIE.strict^7",
                              "PDEN.strict^5",
                              "PERS.strict^4",
                              "PAYS.strict^3",
                              "REG.strict^3",
                              "COM.strict^3",
                              "SUJET.strict^3",
                              "HIST.strict^2",
                              "TYPE.strict^1",
                              "DATE.strict^1",
                              "EPOQ.strict^1",
                              "SCLE.strict^1",
                              "SCLD.strict^1"
                           ],
                           "boost":4
                        }
                     },
                     { 
                        "multi_match":{ 
                           "query":"soleil couchant sur un marais",
                           "operator":"and",
                           "fields":[ 
                              "TICO.strict^10",
                              "AUTR.strict^10",
                              "TITRE.strict^9",
                              "TITR.strict^9",
                              "LEG.strict^9",
                              "LOCA.strict^9",
                              "AUTOEU.strict^9",
                              "AUTOR.strict^9",
                              "AUTG.strict^9",
                              "DENO.strict^8",
                              "DOMN.strict^8",
                              "EDIF.strict^8",
                              "OBJT.strict^8",
                              "REPR.strict^8",
                              "AUTP.strict^7",
                              "SERIE.strict^7",
                              "PDEN.strict^5",
                              "PERS.strict^4",
                              "PAYS.strict^3",
                              "REG.strict^3",
                              "COM.strict^3",
                              "SUJET.strict^3",
                              "HIST.strict^2",
                              "TYPE.strict^1",
                              "DATE.strict^1",
                              "EPOQ.strict^1",
                              "SCLE.strict^1",
                              "SCLD.strict^1"
                           ],
                           "type":"cross_fields",
                           "boost":2
                        }
                     },
                     { 
                        "multi_match":{ 
                           "query":"soleil couchant sur un marais",
                           "operator":"and",
                           "fields":[ 
                              "TICO^10",
                              "AUTR^10",
                              "TITRE^9",
                              "TITR^9",
                              "LEG^9",
                              "LOCA^9",
                              "AUTOEU^9",
                              "AUTOR^9",
                              "AUTG^9",
                              "DENO^8",
                              "DOMN^8",
                              "EDIF^8",
                              "OBJT^8",
                              "REPR^8",
                              "AUTP^7",
                              "SERIE^7",
                              "PDEN^5",
                              "PERS^4",
                              "PAYS^3",
                              "REG^3",
                              "COM^3",
                              "SUJET^3",
                              "HIST^2",
                              "TYPE^1",
                              "DATE^1",
                              "EPOQ^1",
                              "SCLE^1",
                              "SCLD^1"
                           ],
                           "type":"cross_fields"
                        }
                     }
                  ]
               }
            }
         ]
      }
   },
   "size":25,
   "from":0
}
#req = urllib2.Request(url_test)
#req.add_header("Content-Type","application/json")
string = """{"preference":"res"}
{"query":{"bool":{"must":[{"bool":{"should":[{"term":{"BASE.keyword":"Collections des musÃ©es de France (Joconde)"}}]}},{"bool":{"should":[{"multi_match":{"query":"hamlet et horatio","operator":"and","fields":["TICO.strict^10","AUTR.strict^10","TITRE.strict^9","TITR.strict^9","LEG.strict^9","LOCA.strict^9","AUTOEU.strict^9","AUTOR.strict^9","AUTG.strict^9","DENO.strict^8","DOMN.strict^8","EDIF.strict^8","OBJT.strict^8","REPR.strict^8","AUTP.strict^7","SERIE.strict^7","PDEN.strict^5","PERS.strict^4","PAYS.strict^3","REG.strict^3","COM.strict^3","SUJET.strict^3","HIST.strict^2","TYPE.strict^1","DATE.strict^1","EPOQ.strict^1","SCLE.strict^1","SCLD.strict^1"],"boost":4}},{"multi_match":{"query":"hamlet et horatio","operator":"and","fields":["TICO.strict^10","AUTR.strict^10","TITRE.strict^9","TITR.strict^9","LEG.strict^9","LOCA.strict^9","AUTOEU.strict^9","AUTOR.strict^9","AUTG.strict^9","DENO.strict^8","DOMN.strict^8","EDIF.strict^8","OBJT.strict^8","REPR.strict^8","AUTP.strict^7","SERIE.strict^7","PDEN.strict^5","PERS.strict^4","PAYS.strict^3","REG.strict^3","COM.strict^3","SUJET.strict^3","HIST.strict^2","TYPE.strict^1","DATE.strict^1","EPOQ.strict^1","SCLE.strict^1","SCLD.strict^1"],"type":"cross_fields","boost":2}},{"multi_match":{"query":"hamlet et horatio","operator":"and","fields":["TICO^10","AUTR^10","TITRE^9","TITR^9","LEG^9","LOCA^9","AUTOEU^9","AUTOR^9","AUTG^9","DENO^8","DOMN^8","EDIF^8","OBJT^8","REPR^8","AUTP^7","SERIE^7","PDEN^5","PERS^4","PAYS^3","REG^3","COM^3","SUJET^3","HIST^2","TYPE^1","DATE^1","EPOQ^1","SCLE^1","SCLD^1"],"type":"cross_fields"}}]}}]}},"size":25,"from":0}
{"preference":"base"}
{"query":{"bool":{"must":[{"bool":{"should":[{"multi_match":{"query":"hamlet et horatio","operator":"and","fields":["TICO.strict^10","AUTR.strict^10","TITRE.strict^9","TITR.strict^9","LEG.strict^9","LOCA.strict^9","AUTOEU.strict^9","AUTOR.strict^9","AUTG.strict^9","DENO.strict^8","DOMN.strict^8","EDIF.strict^8","OBJT.strict^8","REPR.strict^8","AUTP.strict^7","SERIE.strict^7","PDEN.strict^5","PERS.strict^4","PAYS.strict^3","REG.strict^3","COM.strict^3","SUJET.strict^3","HIST.strict^2","TYPE.strict^1","DATE.strict^1","EPOQ.strict^1","SCLE.strict^1","SCLD.strict^1"],"boost":4}},{"multi_match":{"query":"hamlet et horatio","operator":"and","fields":["TICO.strict^10","AUTR.strict^10","TITRE.strict^9","TITR.strict^9","LEG.strict^9","LOCA.strict^9","AUTOEU.strict^9","AUTOR.strict^9","AUTG.strict^9","DENO.strict^8","DOMN.strict^8","EDIF.strict^8","OBJT.strict^8","REPR.strict^8","AUTP.strict^7","SERIE.strict^7","PDEN.strict^5","PERS.strict^4","PAYS.strict^3","REG.strict^3","COM.strict^3","SUJET.strict^3","HIST.strict^2","TYPE.strict^1","DATE.strict^1","EPOQ.strict^1","SCLE.strict^1","SCLD.strict^1"],"type":"cross_fields","boost":2}},{"multi_match":{"query":"hamlet et horatio","operator":"and","fields":["TICO^10","AUTR^10","TITRE^9","TITR^9","LEG^9","LOCA^9","AUTOEU^9","AUTOR^9","AUTG^9","DENO^8","DOMN^8","EDIF^8","OBJT^8","REPR^8","AUTP^7","SERIE^7","PDEN^5","PERS^4","PAYS^3","REG^3","COM^3","SUJET^3","HIST^2","TYPE^1","DATE^1","EPOQ^1","SCLE^1","SCLD^1"],"type":"cross_fields"}}]}}]}},"size":0,"aggs":{"BASE.keyword":{"terms":{"field":"BASE.keyword","order":{"_count":"desc"},"size":10}}}}
"""
#response = urllib2.urlopen(rqe, json.dumps(data_test))
requests.options(url = url_test, headers = {"Access-Control-Request-Method":"POST","Origin":"https://www.pop.culture.gouv.fr","User-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36", "Access-Control-Request-Headers":"content-type", "Accept":"*/*"})
r = requests.post(url = url_test, data = string, headers = {"Accept":"application/json","User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36","Content-Type":"application/json"})

In [18]:
r.text

'<!DOCTYPE html>\n<html lang="en">\n<head>\n<meta charset="utf-8">\n<title>Error</title>\n</head>\n<body>\n<pre>SyntaxError: Unexpected token { in JSON at position 21<br> &nbsp; &nbsp;at JSON.parse (&lt;anonymous&gt;)<br> &nbsp; &nbsp;at parse (/var/app/current/node_modules/body-parser/lib/types/json.js:89:19)<br> &nbsp; &nbsp;at /var/app/current/node_modules/body-parser/lib/read.js:121:18<br> &nbsp; &nbsp;at invokeCallback (/var/app/current/node_modules/raw-body/index.js:224:16)<br> &nbsp; &nbsp;at done (/var/app/current/node_modules/raw-body/index.js:213:7)<br> &nbsp; &nbsp;at IncomingMessage.onEnd (/var/app/current/node_modules/raw-body/index.js:273:7)<br> &nbsp; &nbsp;at emitNone (events.js:106:13)<br> &nbsp; &nbsp;at IncomingMessage.emit (events.js:208:7)<br> &nbsp; &nbsp;at endReadableNT (_stream_readable.js:1064:12)<br> &nbsp; &nbsp;at _combinedTickCallback (internal/process/next_tick.js:138:11)<br> &nbsp; &nbsp;at process._tickDomainCallback (internal/process/next_tick.js:218:9

# Tests divers

In [None]:
" ".join(remove_punctuation("Dujardin, Jean-Xavier-David, grand duc").split(",")[:2]).split(" ")

In [None]:
remove_punctuation("".join("Dujardin, Jean-Xavier-David, grand duc".split(",")[:2])).split(" ")

In [None]:
"Soleil couchant sur un marais".replace(" ","+")

In [29]:
for match in re.finditer("a","a b c d a"):
        print(match.group())

a
a


In [45]:
list("[a,b,c]")

['[', 'a', ',', 'b', ',', 'c', ']']

In [46]:
artworks_df.count()

Unnamed: 0      2254
Unnamed: 0.1    2254
number          2254
author          2254
life            2254
title           2252
position        2254
width            488
height           490
image_url        301
wall             973
dtype: int64

In [29]:
a, b = "coucou".split(" ")

ValueError: not enough values to unpack (expected 2, got 1)

In [53]:
"- a"[2:]

'a'

In [6]:
punctuation

'!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'

In [18]:
"abcde"[:-1]

'abcd'

In [19]:
"ABCD".lower()

'abcd'

In [54]:
"(" in "(89)"

True

In [13]:
"a" + str(None)

'aNone'