## Traitement de Données Massives

# Projet Partie n°1 

### Binôme : Sapy Oscar & Berthillon Mickaël

# 1 : Collecte de données

In [12]:
import requests
import os

# Define the API endpoint and your access key
url = "https://api.pexels.com/v1/search"
access_key = "MgxBjp4CnSFtTvWe3SeHtkTGp5oUOBWVG5S0PYZAd37hV3T3h2yY4PHp"

# Define the headers with your access key
headers = {
    "Authorization": access_key
}

# Define the path to the images folder
path = os.path.abspath("./images")

# Define the number of images to download
num_images = 15

# Define the query parameters
query_params = {
    "query": "random", 
    "per_page": num_images
}

# Create the images folder if it doesn't exist
if not os.path.exists(path):
    os.makedirs(path)

# Send the API request and download the images
response = requests.get(url, headers=headers, params=query_params)
json = response.json()
for i in range(num_images):
    image_url = json["photos"][i]["src"]["original"]
    image_id = json["photos"][i]["id"]
    image_extension = ".jpg" # change extension as per your requirement
    image_filename = f"{image_id}{image_extension}"
    image_path = os.path.join(path, image_filename)

    # Download the image and save it to the images folder
    response = requests.get(image_url)
    with open(image_path, "wb") as f:
        f.write(response.content)


KeyboardInterrupt: 

# 2 : Etiquetage et annotation

In [None]:
import os
import json
from PIL import Image
from PIL.ExifTags import TAGS
from PIL.TiffImagePlugin import IFDRational

class MyEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, bytes):
            try:
                return obj.decode('utf-8')
            except UnicodeDecodeError:
                return obj.decode('utf-8', 'replace')
        elif isinstance(obj, IFDRational):
            return float(obj)
        return json.JSONEncoder.default(self, obj)

# Path to the folder containing pictures
path_to_folder = os.path.abspath('./images')

# Create the metadata folder if it doesn't exist
if not os.path.exists('metadata'):
    os.mkdir('metadata')

# Loop over all pictures in the folder
for filename in os.listdir(path_to_folder):
    if filename.endswith('.jpg') or filename.endswith('.jpeg') or filename.endswith('.png'):
        # Open the image and get the EXIF data
        image = Image.open(os.path.join(path_to_folder, filename))
        exifdata = image.getexif()

        # Create a dictionary to store the metadata for this image
        metadata = {}

        # Loop over all EXIF tags and add them to the metadata dictionary
        for tag_id, value in exifdata.items():
            tag = TAGS.get(tag_id, tag_id)
            metadata[tag] = value

        # Write the metadata dictionary to a JSON file in the metadata folder
        json_filename = os.path.splitext(filename)[0] + '.json'
        with open(os.path.join('metadata', json_filename), 'w') as f:
            json.dump(metadata, f, cls=MyEncoder)


# 3 : Analyse de données

In [None]:
import os
import json
import torch
from PIL import Image
from PIL.ExifTags import TAGS
from transformers import ViTFeatureExtractor, ViTModel
import cv2
from colorthief import ColorThief
import pytesseract
from imageai.Detection import ObjectDetection

import torchvision.transforms as T
from torchvision.models.detection import fasterrcnn_resnet50_fpn

# Initialize torchvision object detection model
model = fasterrcnn_resnet50_fpn(pretrained=True)
model.eval()

image_path = os.path.abspath("./images")

# Function to get image metadata
def get_image_metadata(image_path):
    metadata = {}

    # Read the image using PIL and OpenCV
    pil_image = Image.open(image_path)
    cv_image = cv2.imread(image_path)

    # Get dominant color and color palette
    color_thief = ColorThief(image_path)
    dominant_color = color_thief.get_color(quality=1)
    color_palette = color_thief.get_palette(color_count=5)

    # Get orientation
    exif_data = pil_image._getexif()
    orientation = exif_data[274] if exif_data and 274 in exif_data else "unknown"

    # Get number of faces
    face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")
    gray = cv2.cvtColor(cv_image, cv2.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(gray, scaleFactor=1.3, minNeighbors=3)
    num_faces = len(faces)

    # Get text
    pil_image_rgb = pil_image.convert('RGB')
    text = pytesseract.image_to_string(pil_image_rgb)

    # Get objects
    transform = T.Compose([T.Resize(256), T.CenterCrop(224), T.ToTensor()])
    img = transform(Image.open(image_path))
    img = img.unsqueeze(0)
    detections = model(img)

    objects = []
    for label, score in zip(detections[0]["labels"], detections[0]["scores"]):
        if score > 0.5:
            objects.append(label.item())


    # Build metadata dictionary
    metadata = {
        "orientation": orientation,
        "dominant_color": dominant_color,
        "color_palette": color_palette,
        "num_faces": num_faces,
        "text": text,
        "objects": objects,
    }

    return metadata

# Loop over all pictures in the folder
for filename in os.listdir(path_to_folder):
    if filename.endswith(".jpg") or filename.endswith(".jpeg") or filename.endswith(".png"):
        # Get the image metadata
        image_path = os.path.join(path_to_folder, filename)
        metadata = get_image_metadata(image_path)

        # Write the metadata dictionary to a JSON file in the metadata folder
        with open(os.path.join("metadata", os.path.splitext(filename)[0] + ".json"), "w") as f:
            json.dump(metadata, f)



# 4 : Visualisation de données

In [None]:
import json
import os

# Read the user's preferences from the preferences.json file
with open("preferences.json", "r") as f:
    preferences = json.load(f)

liked_images = []

# Verify if the image metadata file exists and add it to the liked_images list
for key, value in preferences.items():
    if value and os.path.exists(os.path.join("metadata", os.path.splitext(value)[0] + ".json")):
        liked_images.append(value)

preferred_tags = []

# Loop through the liked_images list and get the tags from the metadata files
for image in liked_images:
    metadata_file = os.path.join("metadata", os.path.splitext(image)[0] + ".json")
    with open(metadata_file, "r") as f:
        metadata = json.load(f)
    
    # Add the tags to the preferred_tags array
    for tag, value in metadata.items():
        preferred_tags.append((tag, value))

print(preferred_tags)


[]


In [18]:
import tkinter as tk
from tkinter import filedialog
from PIL import Image, ImageTk
import os
import json

class ImageSelector(tk.Frame):
    def __init__(self, preferences_file, image_folder, preferred_tags, master=None):
        super().__init__(master)
        self.master = master
        self.pack()
        
        self.preferences_file = preferences_file
        self.image_folder = image_folder
        self.images = os.listdir(image_folder)
        self.preferences = self.read_preferences()
        self.preferred_tags = preferred_tags
        
        self.current_image_index = 0
        self.image_label = tk.Label(self)
        self.image_label.pack()

        self.like_button = tk.Button(self, text="J'aime", command=self.like_image)
        self.like_button.pack()

        self.next_button = tk.Button(self, text="Suivant", command=self.show_next_image)
        self.next_button.pack()

        self.quit_button = tk.Button(self, text="Quitter", command=self.quit_app)
        self.quit_button.pack()

        self.show_next_image()

    def read_preferences(self):
        if os.path.exists(self.preferences_file):
            with open(self.preferences_file, "r") as f:
                return json.load(f)
        else:
            return {}

    def save_preferences(self):
        with open(self.preferences_file, "w") as f:
            json.dump(self.preferences, f)

    def has_common_metadata(self, image_metadata, preferred_tags):
        for tag, value in image_metadata.items():
            if (tag, value) in preferred_tags:
                return True
        return False

    def show_next_image(self):
        while self.current_image_index < len(self.images):
            image_path = os.path.join(self.image_folder, self.images[self.current_image_index])
            image = Image.open(image_path)
            image.thumbnail((400, 400))
            photo = ImageTk.PhotoImage(image)

            # Read image metadata
            metadata_file = os.path.join("metadata", os.path.splitext(self.images[self.current_image_index])[0] + ".json")
            with open(metadata_file, "r") as f:
                metadata = json.load(f)

            # Check if the image has common metadata with liked images
            if self.has_common_metadata(metadata, self.preferred_tags):
                self.image_label.config(image=photo)
                self.image_label.image = photo
                self.current_image_index += 1
                break
            else:
                self.current_image_index += 1
        else:
            self.quit_app()

    def like_image(self):
        current_image = self.images[self.current_image_index - 1]
        self.preferences[current_image] = True
        self.save_preferences()

        # Update preferred_tags with the metadata of the liked image
        metadata_file = os.path.join("metadata", os.path.splitext(current_image)[0] + ".json")
        with open(metadata_file, "r") as f:
            metadata = json.load(f)
        for tag, value in metadata.items():
            self.preferred_tags.append((tag, value))

    def quit_app(self):
        self.master.destroy()

preferences_file = "preferences.json"
image_folder = "images"

root = tk.Tk()
app = ImageSelector(preferences_file, image_folder, preferred_tags, master=root)
app.mainloop()

preferences = app.read_preferences()
print(preferences)


{'image1': '', 'image2': '', 'image3': '', 'image4': '', 'image5': '', 'image6': '', 'image7': '', 'image8': '', 'image9': '', 'image10': '', 'image11': '', 'image12': '', 'image13': '', '4226881.jpg': True, '4022082.jpg': True, '4386404.jpg': True, '3844788.jpg': True, '7004697.jpg': True}
