### Projet

In [5]:
pip install SPARQLWrapper

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [6]:
from SPARQLWrapper import SPARQLWrapper, JSON

def getSPARQLQuery():
    sparql_endpoint = "https://query.wikidata.org/sparql"
    sparql = SPARQLWrapper(sparql_endpoint)
    sparql.setQuery("""
    SELECT ?item ?itemLabel ?image {
    ?item wdt:P31 wd:Q144;  
            wdt:P18 ?image.  
    SERVICE wikibase:label { bd:serviceParam wikibase:language "fr". }
    }""")

    sparql.setReturnFormat(JSON)
    res = sparql.query().convert()

    dog_array = []
    for result in res["results"]["bindings"]:
        dog_array.append(tuple((
            result["itemLabel"]["value"],
            result["image"]["value"])
        ))
    return dog_array

array = getSPARQLQuery()

In [7]:
import pandas as pd

dataframe = pd.DataFrame(array, columns=["dog_name", "image"])
dataframe = dataframe.astype(
    dtype={"dog_name": "<U200", "image" : "<U200"}
)
dataframe["image"] = dataframe["image"].str.ljust(100)
print(dataframe)

               dog_name                                              image
0    Sallie Ann Jarrett  http://commons.wikimedia.org/wiki/Special:File...
1             Q20818039  http://commons.wikimedia.org/wiki/Special:File...
2             Q21002371  http://commons.wikimedia.org/wiki/Special:File...
3             Q21978595  http://commons.wikimedia.org/wiki/Special:File...
4                Kabosu  http://commons.wikimedia.org/wiki/Special:File...
..                  ...                                                ...
150      Chaser (chien)  http://commons.wikimedia.org/wiki/Special:File...
151            Q5101730  http://commons.wikimedia.org/wiki/Special:File...
152            Q5159487  http://commons.wikimedia.org/wiki/Special:File...
153            Q5176692  http://commons.wikimedia.org/wiki/Special:File...
154            Q5189562  http://commons.wikimedia.org/wiki/Special:File...

[155 rows x 2 columns]


In [13]:
import requests
import os
from builtins import open


output_directory = 'images'
if not os.path.exists(output_directory):
    os.makedirs(output_directory)


def download_image(image_url, output_directory):
    headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36'}
    response = requests.get(image_url, headers=headers)
    print(image_url)
    if response.status_code == 200:
        image_filename = output_directory + "/" + os.path.basename(image_url)
        try :
            with open(image_filename, 'wb') as f:
                f.write(response.content)
            print(f"Image téléchargée avec succès : {image_url}")
        except Exception as e :
            print(e)
    else:
        print(f"Échec du téléchargement. Code d'état HTTP : {response.status_code}")


for index, row in dataframe.iterrows():
    image_url = row['image']
    download_image(image_url, output_directory)

http://commons.wikimedia.org/wiki/Special:FilePath/Sallie%20Ann%20Jarrett%2C%20Civil%20War%20mascot.jpg
Image téléchargée avec succès : http://commons.wikimedia.org/wiki/Special:FilePath/Sallie%20Ann%20Jarrett%2C%20Civil%20War%20mascot.jpg
http://commons.wikimedia.org/wiki/Special:FilePath/Tuna%20Melts%20My%20Heart.jpg                    
Image téléchargée avec succès : http://commons.wikimedia.org/wiki/Special:FilePath/Tuna%20Melts%20My%20Heart.jpg                    
http://commons.wikimedia.org/wiki/Special:FilePath/Doug%20the%20Pug%20NYC.jpg                       
Image téléchargée avec succès : http://commons.wikimedia.org/wiki/Special:FilePath/Doug%20the%20Pug%20NYC.jpg                       
http://commons.wikimedia.org/wiki/Special:FilePath/Teddy%2C%20dog%20in%20silent%20film%20comedies%20%28SAYRE%209685%29.jpg
Image téléchargée avec succès : http://commons.wikimedia.org/wiki/Special:FilePath/Teddy%2C%20dog%20in%20silent%20film%20comedies%20%28SAYRE%209685%29.jpg
http://commons