In [1]:
# Data Collection

!pip install kaggle --upgrade
!pip3 install Pillow

import kaggle
import os
import json
import random
from datetime import datetime
from PIL import Image
from PIL.ExifTags import TAGS



In [2]:
## You have to collect and download a set of images. You have the following tasks to program, automating the process as much as possible:

### 1. Create a folder called images.

!mkdir -p ./images
!mkdir -p ./metadata

### 2. Download open-licensed images to the folder images (minimum 100 images).

kaggle.api.authenticate()
kaggle.api.dataset_download_files('nielspace/pexels-mountain-images', path='./images', unzip=True)    
    
### 3. Save metadata of every image like image size, image format (.jpeg, .png, etc.), image orientation (landscape, portrait, square, etc.), creation date, camera model, etc. in one or more JSON files. You can make use of the Exif information present in the image files.

directory = './images/Mountain'
metadata = {}
orientation = ""

for filename in os.listdir(directory) :
    f = os.path.join(directory, filename)
    if os.path.isfile(f):
        path, file_extension = os.path.splitext(f)
        time_creation = os.path.getmtime(f)
        size = Image.open(f).size
        if size[0]>size[1] :
            orientation = "landscape"
        elif size[0] == size[1] :
            orientation = "square"
        else :
            orientation = "portrait"
        metadata[filename] = {
            "file_extension": file_extension,
            "creation_date": datetime.fromtimestamp(time_creation/1000.0).strftime("%m/%d/%Y, %H:%M:%S"),
            "size": size,
            "orientation": orientation
            }

with open("./metadata/metadata.json", "w") as outfile:
    json.dump(metadata, outfile, indent=4)

        

In [3]:
# Labeling and Annotation

directory = './images/Mountain'
data_path = "./metadata/metadata.json"

def get_colors(image_file, numcolors=3, resize=150, output = []):
    # Resize image to speed up processing
    img = Image.open(image_file)
    img = img.copy()
    img.thumbnail((resize, resize))
 
    # Reduce to palette
    paletted = img.convert('P', palette=Image.ADAPTIVE, colors=numcolors)
 
    # Find dominant colors
    palette = paletted.getpalette()
    color_counts = sorted(paletted.getcolors(), reverse=True)
    colors = list()
    for i in range(numcolors):
        palette_index = color_counts[i][1]
        dominant_color = palette[palette_index*3:palette_index*3+3]
        colors.append(tuple(dominant_color))
        output=[]
    for color in colors:
        output.append("#"+('{:X}{:X}{:X}').format(color[0], color[1], color[2]))
    return output

if os.path.isfile(data_path):
    with open(data_path) as target:
        json_data = json.load(target)
        
for filename in os.listdir(directory) :
    f = os.path.join(directory, filename)
    if os.path.isfile(f):
        tags = {}
        tags["like"] = random.randint(0,100)
        tags["colors"] = get_colors(f) 
        tags["hashtag"] = "moutain"
        json_data[filename]["tags"] = tags
        
with open("./metadata/metadata.json", "w") as outfile:
    json.dump(json_data, outfile, indent=4)

In [25]:
# Data Analyses

numb_users = 1
directory = './images/Mountain'
data_path = "./metadata/metadata.json"
rand = 0
json_data_users = {}

# Metadata for each file
if os.path.isfile(data_path):
    with open(data_path) as target:
        json_data = json.load(target)

for i in range (0, numb_users):
    images_per_users = []
    tags_per_users = []
    rand = random.randint(1, 4)
    if rand == 1:
        tags_per_users.append(["like", "colors"])
    elif rand == 2:
        tags_per_users.append("colors")
    elif rand == 3:
        tags_per_users.append(["hashtag", "colors"])
    else:
        tags_per_users.append(["like, hashtag", "colors"])
    for filename in os.listdir(directory) :
        f = os.path.join(directory, filename)
        if os.path.isfile(f):
            rand = random.randint(1, 3)
            if rand == 1 :
                images_per_users.append(filename)
    json_data_users[i] = {
        "images": images_per_users,
        "tags": tags_per_users
    }
    
# Building up the profile
for i in range(0, len(json_data_users)):
    favorite_images = json_data_users[i]["images"]
    for j in range(0, len(favorite_images)):
        image_metadata = json_data[favorite_images[j]]
         ## Favorite colors
        colors_image = image_metadata["colors"]
        ## Favorite image orientation
        orientation_image = image_metadata["orientation"]
        ## Favorite image sizes (thumbnail images, large images, medium-size images, etc.)
        size_image = image_metadata["size"]
    ## Favorite tags
    favorite_tags = json_data_users[i]["tags"]
         
json_result = {}

{'file_extension': '.jpg', 'creation_date': '01/20/1970, 02:13:58', 'size': [5440, 8160], 'orientation': 'portrait', 'tags': {'like': 7, 'colors': ['#B5BBCD', '#8A96AF', '#D7D7E0'], 'hashtag': 'moutain'}}
{'file_extension': '.jpg', 'creation_date': '01/20/1970, 02:13:58', 'size': [5440, 8160], 'orientation': 'portrait', 'tags': {'like': 7, 'colors': ['#B5BBCD', '#8A96AF', '#D7D7E0'], 'hashtag': 'moutain'}}
{'file_extension': '.jpg', 'creation_date': '01/20/1970, 02:13:58', 'size': [5440, 8160], 'orientation': 'portrait', 'tags': {'like': 7, 'colors': ['#B5BBCD', '#8A96AF', '#D7D7E0'], 'hashtag': 'moutain'}}
{'file_extension': '.jpg', 'creation_date': '01/20/1970, 02:13:58', 'size': [5440, 8160], 'orientation': 'portrait', 'tags': {'like': 7, 'colors': ['#B5BBCD', '#8A96AF', '#D7D7E0'], 'hashtag': 'moutain'}}
{'file_extension': '.jpg', 'creation_date': '01/20/1970, 02:13:58', 'size': [5440, 8160], 'orientation': 'portrait', 'tags': {'like': 7, 'colors': ['#B5BBCD', '#8A96AF', '#D7D7E0']