In [1]:
# Data Collection

!pip install kaggle --upgrade
!pip3 install Pillow

import kaggle
import os
import json
import random
from datetime import datetime
from PIL import Image, ImageStat
from PIL.ExifTags import TAGS

def get_predominant_colors(image_file, numcolors=3, resize=150, output = []):
    # Resize image to speed up processing
    img = Image.open(image_file)
    img = img.copy()
    img.thumbnail((resize, resize))
 
    # Reduce to palette
    paletted = img.convert('P', palette=Image.ADAPTIVE, colors=numcolors)
 
    # Find dominant colors
    palette = paletted.getpalette()
    color_counts = sorted(paletted.getcolors(), reverse=True)
    colors = list()
    for i in range(numcolors):
        palette_index = color_counts[i][1]
        dominant_color = palette[palette_index*3:palette_index*3+3]
        colors.append(tuple(dominant_color))
    return colors





In [3]:
## You have to collect and download a set of images. You have the following tasks to program, automating the process as much as possible:
### 1. Create a folder called images.

!mkdir -p ./images
!mkdir -p ./metadata

### 2. Download open-licensed images to the folder images (minimum 100 images).

kaggle.api.authenticate()
kaggle.api.dataset_download_files('nielspace/pexels-mountain-images', path='./images', unzip=True)    
    
### 3. Save metadata of every image like image size, image format (.jpeg, .png, etc.), image orientation (landscape, portrait, square, etc.), creation date, camera model, etc. in one or more JSON files. You can make use of the Exif information present in the image files.

directory = './images/Mountain'
metadata = {}
orientation = ""
images_data =[]
for filename in os.listdir(directory) :
    f = os.path.join(directory, filename)
    if os.path.isfile(f):
        image_data = {}
        path, file_extension = os.path.splitext(f)
        time_creation = os.path.getmtime(f)
        im = Image.open(f)
        size = im.size
        colors = {}
        colors["predominant_colors"] = get_predominant_colors(directory + "/" + filename)
        for idx,val in enumerate(["red","green","blue"]):
            colors[val] = Image.Image.getextrema(im)[idx]
            colors[val] = colors[val] + (ImageStat.Stat(im).median[idx],)
        print(colors)
        if size[0]>size[1] :
            orientation = "landscape"
        elif size[0] == size[1] :
            orientation = "square"
        else :
            orientation = "portrait"
        metadata[filename] = {
            "file_extension": file_extension,
            "creation_date": datetime.fromtimestamp(time_creation/1000.0).strftime("%m/%d/%Y, %H:%M:%S"),
            "size": size,
            "orientation": orientation,
            "colors": colors
            }


with open("./metadata/metadata.json", "w") as outfile:
    json.dump(metadata, outfile, indent=4)

        

{'predominant_colors': [(90, 66, 37), (192, 160, 114), (223, 202, 144)], 'red': (0, 255, 169), 'green': (0, 255, 141), 'blue': (0, 255, 72)}
{'predominant_colors': [(65, 74, 84), (135, 141, 147), (184, 186, 186)], 'red': (0, 255, 115), 'green': (0, 255, 122), 'blue': (0, 255, 129)}
{'predominant_colors': [(179, 176, 180), (248, 249, 251), (230, 229, 232)], 'red': (15, 255, 216), 'green': (12, 255, 215), 'blue': (17, 255, 219)}
{'predominant_colors': [(29, 112, 169), (36, 55, 78), (109, 176, 197)], 'red': (0, 255, 24), 'green': (0, 255, 91), 'blue': (0, 255, 171)}
{'predominant_colors': [(82, 69, 57), (231, 232, 237), (205, 204, 208)], 'red': (11, 255, 170), 'green': (6, 253, 157), 'blue': (0, 252, 150)}
{'predominant_colors': [(43, 48, 43), (213, 214, 216), (135, 139, 143)], 'red': (8, 255, 84), 'green': (15, 255, 83), 'blue': (0, 255, 79)}
{'predominant_colors': [(37, 51, 66), (212, 217, 225), (144, 147, 168)], 'red': (0, 255, 90), 'green': (0, 255, 90), 'blue': (0, 255, 118)}
{'predo

{'predominant_colors': [(21, 42, 55), (102, 143, 167), (164, 188, 199)], 'red': (0, 255, 72), 'green': (0, 244, 110), 'blue': (0, 237, 128)}
{'predominant_colors': [(74, 69, 64), (186, 188, 197), (180, 168, 173)], 'red': (0, 255, 176), 'green': (0, 255, 160), 'blue': (0, 255, 163)}
{'predominant_colors': [(157, 215, 249), (80, 133, 171), (184, 224, 241)], 'red': (0, 255, 152), 'green': (23, 255, 211), 'blue': (51, 255, 240)}
{'predominant_colors': [(71, 87, 114), (135, 144, 158), (215, 220, 225)], 'red': (0, 255, 110), 'green': (0, 255, 121), 'blue': (0, 255, 149)}
{'predominant_colors': [(249, 216, 215), (250, 225, 223), (171, 142, 147)], 'red': (27, 255, 248), 'green': (29, 245, 212), 'blue': (42, 245, 211)}
{'predominant_colors': [(8, 12, 17), (24, 32, 41), (60, 62, 69)], 'red': (0, 255, 15), 'green': (0, 255, 23), 'blue': (0, 255, 31)}
{'predominant_colors': [(33, 41, 39), (129, 132, 134), (181, 182, 184)], 'red': (3, 221, 95), 'green': (8, 222, 103), 'blue': (4, 224, 105)}
{'predo

In [6]:
# Labeling and Annotation

directory = './images/Mountain'
data_path = "./metadata/metadata.json"


if os.path.isfile(data_path):
    with open(data_path) as target:
        json_data = json.load(target)
        
for filename in os.listdir(directory) :
    f = os.path.join(directory, filename)
    if os.path.isfile(f):
        tags = {}
        tags["like"] = random.randint(0,100)
        tags["hashtag"] = "moutain"
        rand = random.randint(1, 3)
        if rand == 1 :
            tags["test"] = "yes"
        else :
            tags["test"] = "no"
        json_data[filename]["tags"] = tags
        
with open("./metadata/metadata.json", "w") as outfile:
    json.dump(json_data, outfile, indent=4)

In [7]:
from sklearn import tree
import pandas as pd
from sklearn.preprocessing import LabelEncoder

# Data Analyses

numb_users = 1
directory = './images/Mountain'
data_path = "./metadata/metadata.json"
rand = 0

# Metadata for each file
if os.path.isfile(data_path):
    with open(data_path) as target:
        json_data = json.load(target)

# Creating data for each user
json_data_users = {}        
for i in range (0, numb_users):
    images_per_users = []
    tags_per_users = []
    rand = random.randint(1, 4)
    if rand == 1:
        tags_per_users.append(["like", "colors"])
    elif rand == 2:
        tags_per_users.append("colors")
    elif rand == 3:
        tags_per_users.append(["hashtag", "colors"])
    else:
        tags_per_users.append(["like, hashtag", "colors"])
    for filename in os.listdir(directory) :
        f = os.path.join(directory, filename)
        if os.path.isfile(f):
            rand = random.randint(1, 3)
            if rand == 1 :
                images_per_users.append(filename)
                print(filename)
    json_data_users[i] = {
        "images": images_per_users,
        "tags": tags_per_users
    }


pexels-iconcom-733174.jpg
pexels---3182925.jpg
pexels-lisa-fotios-1036371.jpg
pexels-pixabay-355482.jpg
pexels-jarod-lovekamp-3791466.jpg
pexels-eberhard-grossgasteiger-701353.jpg
pexels-robby-mccullough-1867601.jpg
pexels-eberhard-grossgasteiger-1699030.jpg
pexels-eberhard-grossgasteiger-1699021.jpg
pexels-eberhard-grossgasteiger-1428277.jpg
pexels-vittorio-staffolani-655674.jpg
pexels-benjamin-suter-3733269.jpg
pexels-archie-binamira-913215.jpg
pexels-denis-linine-714258.jpg
pexels-susan-204262.jpg
pexels-pixabay-158272.jpg
pexels-eberhard-grossgasteiger-2088210.jpg
pexels-zhaocan-li-1755243.jpg
pexels-pixabay-355747.jpg
pexels-may-barros-1260841.jpg
pexels-eberhard-grossgasteiger-1287145.jpg
pexels-kasuma-908644.jpg
pexels-trace-hudson-2896668.jpg
pexels-eberhard-grossgasteiger-1624504.jpg
pexels-eberhard-grossgasteiger-1699027.jpg
pexels-eberhard-grossgasteiger-720240.jpg
pexels-billel-moula-540518.jpg
pexels-marius-venter-1659437.jpg
pexels-trace-hudson-2724664.jpg


In [9]:
# Creating dataframes to predict what the user might like
data = []
result = []
json_data_learning = dict(list(json_data.items())[len(json_data)//3:])
for i in json_data_learning:
    if i in json_data_users[0]["images"]:
        result.append('Favorite')
    else:
        result.append('NotFavorite')
    data.append([json_data_learning[i]["colors"]["predominant_colors"][0][0],
                 json_data_learning[i]["colors"]["predominant_colors"][0][1],
                 json_data_learning[i]["colors"]["predominant_colors"][0][2],
                 json_data_learning[i]["colors"]["red"][1],
                 json_data_learning[i]["colors"]["red"][2],
                 json_data_learning[i]["colors"]["green"][1],
                 json_data_learning[i]["colors"]["green"][2],
                 json_data_learning[i]["colors"]["blue"][1],
                 json_data_learning[i]["colors"]["blue"][2],
                 json_data_learning[i]["orientation"], 
                 json_data_learning[i]["size"][0],
                 json_data_learning[i]["size"][1]
                ])

dataframe = pd.DataFrame(data, columns=['predominant_red', 'predominant_green', 'predominant_blue','max_red', 'median_red','max_green', 'median_green','max_blue', 'median_blue','orientation', 'width', 'height'])
resultframe = pd.DataFrame(result, columns=['favorite'])
dataframe

Unnamed: 0,predominant_red,predominant_green,predominant_blue,max_red,median_red,max_green,median_green,max_blue,median_blue,orientation,width,height
0,166,205,232,255,166,254,193,255,220,landscape,6000,4000
1,252,252,252,255,252,255,252,255,252,portrait,4160,5200
2,81,77,74,255,125,255,116,255,109,portrait,3648,5472
3,108,133,166,255,103,251,123,255,150,landscape,7782,5191
4,76,77,77,242,191,243,192,245,194,portrait,3648,5472
...,...,...,...,...,...,...,...,...,...,...,...,...
63,17,9,4,255,52,255,29,255,8,landscape,4935,3293
64,170,155,165,255,205,255,183,254,188,landscape,3648,2432
65,58,79,72,255,102,255,122,255,115,landscape,7952,5304
66,30,48,69,255,55,250,69,255,96,portrait,2048,2560


In [None]:
import os
import math 
import matplotlib.pyplot as plot
isLiked = resultframe["favorite"]=="Favorite"
dataframeLiked = dataframe.filter(items = resultframe[isLiked]["favorite"].index, axis = 0)
grouped = dataframeLiked.groupby(['orientation']).count()
dataframeLiked
grouped = grouped.rename(columns={'width':'count'})["count"].reset_index()
grouped.plot(x=0, kind='bar', title="Orientations liked")

In [None]:
#generating numerical labels
le1 = LabelEncoder()
dataframe['orientation'] = le1.fit_transform(dataframe['orientation'])

le2 = LabelEncoder()
resultframe['favorite'] = le2.fit_transform(resultframe['favorite'])

dtc = tree.DecisionTreeClassifier()
dtc = dtc.fit(dataframe, resultframe)

prediction = dtc.predict([
        [1,
         2,
         3,
         le1.transform(['landscape'])[0],
         12,
         23
        ]])
print(le2.inverse_transform(prediction))
print(dtc.feature_importances_)