# **Installation of Packages**

First install packages like numpy, scikit-learn, matplotlib

In [1]:
!pip3 install numpy 
!pip3 install SPARQLWrapper
!pip3 install scikit-learn
!pip3 install PIL
!pip3 install pandas
!pip3 install webcolors

Collecting PIL
[31m  Could not find a version that satisfies the requirement PIL (from versions: )[0m
[31mNo matching distribution found for PIL[0m


# **Importation of packages**

We import the necessary packages

In [2]:
import os, sys, json
import urllib 
import shutil
import numpy
from SPARQLWrapper import SPARQLWrapper, JSON
from PIL import Image
from sklearn.cluster import KMeans, MiniBatchKMeans
import webcolors
import random

# **Load Dataset**

In [3]:
endpoint_url = "https://query.wikidata.org/sparql"

img_data = {}

#Requete pour recuperer les infos de l'entité donnée - Q144/Chien par defaut
def query(wd='Q144', limit=5):
     return """SELECT ?item ?itemLabel ?pic{?item wdt:P31 wd:"""+wd+""".?item wdt:P18 ?pic}limit """+str(limit)+""""""

def get_results(endpoint_url, query):
    user_agent = "WDQS-example Python/%s.%s" % (sys.version_info[0], sys.version_info[1])
    sparql = SPARQLWrapper(endpoint_url, agent=user_agent)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    return sparql.query().convert()

#Pour creer le fichier des images s'il existe pas
def createFolder(name='images'):
    if name not in os.listdir():
        os.mkdir(name)
    else:
        print('Dossier '+name+' déjà exitant !')

#Pour verifier si un dossier est vide - evitons de retélécharger des images inutilement.
def isEmpty(name='images'):
    if len(os.listdir(name)) == 0:
        return True
    return False

#Pour ajouter des elements a la bdd
def addImg(wd,limit):
    results = get_results(endpoint_url, query(wd,limit))
    for result in results['results']['bindings']:
        img_data[result['item']['value'].split("/")[-1]] = {}
        #img_data[result['item']['value'].split("/")[-1]] = {'image': result['pic']['value'],}
    #print(img_data)
        downloadImage(result['pic']['value'], result['item']['value'].split("/")[-1])

def addAllImg(wd,limit):
    for w in wd:
        addImg(w,limit)

#Pour telecharger l'image d'un element de la bd
def downloadImage(url, name_img='img0'):
    full_path = 'images/'+name_img+'.jpg'
    urllib.request.urlretrieve(url, full_path)

#Si les images sont deja telechargées on initialise le tableau
def initAlreadyDownload(namefolder='images'):
    for image in os.listdir(namefolder):
        #img_data[image.split(".")[0]] = {'image': "",}
        img_data[image.split(".")[0]] = {}
    
#Pour télécharger toutes les images - limit par query
def downloadAllImages(wd=['Q144'], limit=5):
    createFolder()
    if isEmpty() == False:
        print('Images déjà téléchargées !')
        initAlreadyDownload()
        return False
    addAllImg(wd,limit)
    return True 


#Supprimer une image du dossier 
def removeImage(image):
    if os.path.isfile('images/'+image):
        os.remove('images/'+image)
        img_data.pop(image.split(".")[0])
    else:
        shutil.rmtree('images/'+image)
    
downloadAllImages(wd=['Q144','Q14660'],limit=5)
print(img_data)


Dossier images déjà exitant !
Images déjà téléchargées !
{'Q7809730': {}, 'Q7650148': {}, 'Q7704028': {}, 'Q7802871': {}, 'Q7660509': {}}


# **Adding Meta to the Dataset**

## *1. Exif* 

In [4]:
#Obtenir les metas d'une image
def getMetaImg(name, imgfile): #name = 'img'
    
    img_data[name]['size'] = imgfile.size #(largeur,hauteur)
    img_data[name]['format'] = imgfile.format
    img_data[name]['orientation'] = getOrientationImg(img_data[name]['size'])
    
    getColorsImg(name, imgfile)
    
    exif_data = imgfile._getexif()
    if exif_data:
        img_data[name]['date'] = exif_data.get(36867, "") #Date
        img_data[name]['model'] = exif_data.get(272, "") #Appareil utilisé
    else:
        pass
        #print("Aucune donnée Exif trouvée.")

def getOrientationImg(size): #(largeur,hauteur)
    width,height = size
    return "landscape" if width > height else "portrait" if height > width else "squared"
    
    


## *2. Main Color* 

In [5]:
def getColorsImg(name, imgfile, cluster_nbr=3, n_init=1):

    img_array = numpy.array(imgfile)
    img_vector = img_array.reshape(-1, 3)
    clusters = MiniBatchKMeans(n_clusters=cluster_nbr, n_init=n_init, random_state=0).fit(img_vector) #Random state pour garder la meme seed ppour toutes les images
    
    img_data[name]['couleur'] = {}
    
    for i in range(cluster_nbr):
        rgb = (int(clusters.cluster_centers_[i][0]),int(clusters.cluster_centers_[i][1]),int(clusters.cluster_centers_[i][2]))

        # chercher le nom de couleur correspondant au code RGB
        try:
            color_name = webcolors.rgb_to_name(rgb)
        except ValueError:
            # si le nom exact n'est pas trouvé, trouver le nom le plus proche
            min_colors = {}
            for key, name_ in webcolors.CSS3_HEX_TO_NAMES.items():
                r_c, g_c, b_c = webcolors.hex_to_rgb(key)
                rd = (r_c - rgb[0]) ** 2
                gd = (g_c - rgb[1]) ** 2
                bd = (b_c - rgb[2]) ** 2
                min_colors[(rd + gd + bd)] = name_
            color_name = min_colors[min(min_colors.keys())]
        
        
        img_data[name]['couleur'][color_name] = rgb


## *3. All Meta* 

In [6]:
def openImgGetMeta(name): #name = 'img.jpg'
    try:
        with Image.open("images/"+name) as imgfile:
            #print(imgfile.size, imgfile.format)
            name = name.split(".")[0]
            getMetaImg(name, imgfile)

        imgfile.close()
        return True
    except FileNotFoundError as e:
        print('Impossible de trouver le fichier {}: {}'.format("images/"+name, e))
        return False
    except Exception as e:
        print('Erreur lors de l\'ouverture de l\'image {}: {}'.format("images/"+name, e))
        removeImage(name)
        return False
        
#Obtenir les metas de toutes nos images    
def openGetMetaAllImg(namefolder='images'):
    for image in os.listdir(namefolder):
        openImgGetMeta(image)

openGetMetaAllImg()

## *4. Export to JSON* 

In [7]:
#Enregistre les metadonnées des images dans un JSON
def exportToJSON(data,name='img_data.json'):
    with open(name, 'w', encoding='utf-8') as f:
        json.dump(data, f, ensure_ascii=False, indent=4)
    print("done")
        
exportToJSON(data=img_data)

done


## *5. Open JSON* 

In [8]:

def openJSON(file="img_data.json"):
    # Ouvrir le fichier JSON en mode lecture (fermé automatiquement)
    with open(file, 'r') as f:
        data = json.load(f)
    return data
    

## *6. Add categories* 

In [9]:
import requests
import json


def findTagsImg(image_id = "Q7704028"):

    #requête API à Wikidata pour récupérer les catégories de l'image
    response = requests.get("https://www.wikidata.org/w/api.php?action=wbgetentities&ids=" + image_id + "&props=claims&format=json")
    data = json.loads(response.text)
    
    categories = []
    
    # Trouvez les catégories dans la réponse de la requête API
    if "P910" in data["entities"][image_id]["claims"]:
        category = [x["mainsnak"]["datavalue"]["value"] for x in data["entities"][image_id]["claims"]["P910"]]
        categories += formatCategory(category)
    if "P373" in data["entities"][image_id]["claims"]:
        category = [x["mainsnak"]["datavalue"]["value"] for x in data["entities"][image_id]["claims"]["P373"]]
        categories += formatCategory(category)
    
    return categories


def formatCategory(categories=[]):
    categories_ = categories[:]
    
    replaceCar = ['(', ')']
    splitCar = [',', ' ']
    
    for i,category in enumerate(categories_):
        for car in replaceCar:
            if car in category:
                categories_[i] = categories_[i].replace(car, '')
        for car in splitCar:
            if car in category:
                categories_.append(categories_[i].split(car)[1])
                categories_[i] = categories_[i].split(car)[0]
        categories_[i] = categories_[i].lower()
    return categories_


def addTagsJSON(image_id = "Q7704028", categories=[]):
    data = openJSON(file="img_data.json")
    data[image_id]['tags'] = categories


def addAllTagsJSON():
    data = openJSON(file="img_data.json")
    for image_id in data:
        data[image_id]['tags'] = findTagsImg(image_id)
    return data

data = addAllTagsJSON()
exportToJSON(data)


done


# **Data Analyses**

## *1. Class User* 

In [10]:
class User:
    def __init__(self, favorite_color, favorite_orientation, favorite_size, favorite_tags):
        self._favorite_color = favorite_color
        self._favorite_orientation = favorite_orientation
        self._favorite_size = favorite_size
        self._favorite_tags = favorite_tags

    # getter and setter for favorite_color attribute
    def get_favorite_color(self):
        return self._favorite_color

    def set_favorite_color(self, new_favorite_color):
        self._favorite_color = new_favorite_color

    # getter and setter for favorite_orientation attribute
    def get_favorite_orientation(self):
        return self._favorite_orientation

    def set_favorite_orientation(self, new_favorite_orientation):
        self._favorite_orientation = new_favorite_orientation

    # getter and setter for favorite_size attribute
    def get_favorite_size(self):
        return self._favorite_size

    def set_favorite_size(self, new_favorite_size):
        self._favorite_size = new_favorite_size

    # getter and setter for favorite_tags attribute
    def get_favorite_tags(self):
        return self._favorite_tags

    def set_favorite_tags(self, new_favorite_tags):
        self._favorite_tags = new_favorite_tags
    
    def __str__(self):
        return f"User with favorite color {self._favorite_color}, orientation {self._favorite_orientation}, size {self._favorite_size}, and tags {self._favorite_tags}"
    
    
class RandomUser(User):
    # possible values for each attribute
    colors = ["red", "blue", "green", "yellow", "purple", "orange"]
    orientations = ["portrait", "landscape", "square"]
    sizes = ["small", "medium", "large"]
    tags = ["nature", "food", "travel", "art", "music", "sports"]

    def __init__(self):
        # call the parent constructor with random parameters
        super().__init__(
            favorite_color=random.choices(self.colors, k=2),
            favorite_orientation=random.choice(self.orientations),
            favorite_size=random.choice(self.sizes),
            favorite_tags=random.sample(self.tags, k=3)
        )

In [11]:
# create a new random user
user1 = RandomUser()

# access the attributes of the user
color = user1.get_favorite_color() # returns a random color
orientation = user1.get_favorite_orientation() # returns a random orientation
size = user1.get_favorite_size() # returns a random size
tags = user1.get_favorite_tags() # returns a random list of tags

print(user1)

User with favorite color ['yellow', 'blue'], orientation landscape, size small, and tags ['nature', 'food', 'art']


## *2. Analyse User* 

# **Data Visualization**

# **Recommendation System**
