In [86]:
import pandas as pd
import numpy as np
from pandas.core.series import Series
import abc
import json
import colorsys
import math

In [87]:
USERS_CSV = 'data/Prado_users.csv'
USERS_SCALED_CSV = 'data/Prado_users_scaled.csv'
ARTWORKS_CSV = 'data/Prado_artworks_wikidata.csv'

In [88]:
aux = pd.read_csv(ARTWORKS_CSV)[['wd:paintingID', 'Artist', 'Category']] 
print(aux.loc[aux['wd:paintingID']=='Q920030']['Artist'].to_list()[0])
print(aux.loc[aux['wd:paintingID']=='Q952796']['Artist'].to_list()[0])

(aux.loc[aux['wd:paintingID']=='Q920030']['Artist'].to_list()[0]) == (aux.loc[aux['wd:paintingID']=='Q952796']['Artist'].to_list()[0])

Francisco Goya
Francisco Goya


True

In [89]:
class SimilarityFunctionInterface(metaclass=abc.ABCMeta):
    
    @abc.abstractmethod
    def computeSimilarity(self, A, B) -> float:
        """Compute similarity between two lists"""
        raise NotImplementedError()

In [90]:
class SimilarityAge(SimilarityFunctionInterface):
    """Compute similarity between people (by age)"""
    def __init__(self, age_index):
        self.age_index = age_index
        
    def computeSimilarity(self, A, B):
        """Overrides SimilarityFuntionInterface.computeSimilarity()"""
        return 1 - (1 / (self.age_index - 1) * abs(A - B))
    
class SimilarityGender(SimilarityFunctionInterface):
    """Compute similarity between people (by age)"""
        
    def computeSimilarity(self, A, B):
        """Overrides SimilarityFuntionInterface.computeSimilarity()"""
        return A == B
    
class SimilarityCountry(SimilarityFunctionInterface):
    """Compute similarity between people (by age)"""
        
    def computeSimilarity(self, A, B):
        """Overrides SimilarityFuntionInterface.computeSimilarity()"""
        return A == B

###################################################
class SimilarityDemographic(SimilarityFunctionInterface):
    """Compute similarity between people (by demographic)"""
    def __init__(self, age_index, country_weight=0.3, age_weight=0.5, gender_weight=0.2):
        self.country_weight = country_weight
        self.age_weight = age_weight
        self.gender_weight = gender_weight
        self.age_index = age_index
    
    def computeSimilarity(self, A, B):
        """Overrides SimilarityFuntionInterface.computeSimilarity()"""
        country_sim = SimilarityCountry().computeSimilarity(A.country, B.country)
        age_sim = SimilarityAge(self.age_index).computeSimilarity(A.age, B.age)
        gender_sim = SimilarityGender().computeSimilarity(A.gender, B.gender)
                       
        return (country_sim*self.country_weight) + (age_sim*self.age_weight) + (gender_sim*self.gender_weight)
    
######################################################    
class SimilarityPolarity(SimilarityFunctionInterface):
    """Compute similarity between people (by artwork tastes)"""
    def __init__(self, artworks_sim, positive_weight=0.4, negative_weight=0.4, mixed_weight=0.2):
        self.positive_weight = positive_weight
        self.negative_weight = negative_weight
        self.mixed_weight = mixed_weight
        
    def computeSimilarity(self, A, B):
        """Overrides SimilarityFunctionInterface.computeSimilarity()"""
        positive_sim = 0
        for art1 in A.positive:
            for art2 in B.positive:
                positive_sim += self.similitudCuadros[art1][art2]
        positive_sim /= len(A.positive) if len(A.positive) > len(B.positive) else len(B.positive)
        
        negative_sim = 0
        for art1 in A.negative:
            for art2 in B.negative:
                negative_sim += self.similitudCuadros[art1][art2]
        negative_sim /= len(A.negative) if len(A.negative) > len(B.negative) else len(B.negative)
        
        mixed_sim = 0
        for art1 in A.mixed:
            for art2 in B.mixed:
                mixed_sim += self.similitudCuadros[art1][art2]
        mixed_sim /= len(A.mixed) if len(A.mixed) > len(B.mixed) else len(B.mixed)
        
        return (positive_sim * self.positive_weight) + (negative_sim * self.negative_weight) + (mixed * self.mixed_weight)

In [91]:
class SimilarityArtist(SimilarityFunctionInterface):
    """Compute similarity between artworks (by artist)"""
    def __init__(self, data_csv=ARTWORKS_CSV):
        self.data = pd.read_csv(data_csv)
    
    def computeSimilarity(self, A, B):
        """Overrides SimilarityFunctionInterface.computeSimilarity()"""
        return (self.data.loc[aux['wd:paintingID'] == A]['Artist'].to_list()[0] == self.data.loc[aux['wd:paintingID'] == B]['Artist'].to_list()[0])
        
class SimilarityCategory(SimilarityFunctionInterface):
    """Compute similarity between artworks (by category)"""
    def __init__(self, data_csv=ARTWORKS_CSV):
        self.data = pd.read_csv(data_csv)

    def computeSimilarity(self, A, B):
        """Overrides SimilarityFunctionInterface.computeSimilarity()"""
        return(self.data.loc[aux['wd:paintingID'] == A]['Category'].to_list()[0] == self.data.loc[aux['wd:paintingID'] == B]['Category'].to_list()[0])

class SimilarityColors(SimilarityFunctionInterface):
    """Compute similarity between artworks (by color)"""
    def __init__(self, data_csv=ARTWORKS_CSV, colors_json='data/artworkColors.json'):
        with open(colors_json)as f:
            self.colors = json.load(f)
        
    def computeSimilarity(self, A, B):
        """Overrides SimilarityFunctionInterface.computeSimilarity()"""
        max_a = int(max(self.colors.get(A).get('frequency'), key=self.colors.get(A).get('frequency').get))
        max_b = int(max(self.colors.get(B).get('frequency'), key=self.colors.get(B).get('frequency').get))
        
        rgb_a = self.colors.get(A).get('colors')[max_a]
        rgb_b = self.colors.get(B).get('colors')[max_b]
        
        hsv_a = colorsys.rgb_to_hsv(rgb_a[0], rgb_a[1], rgb_a[2])
        hsv_b = colorsys.rgb_to_hsv(rgb_b[0], rgb_b[1], rgb_b[2])
        
        ### Sacado de año anterior ###
        dh = min(abs(hsv_a[0]-hsv_b[0]), 360-abs(hsv_a[0]-hsv_b[0])) / 180.0
        ds = abs(hsv_a[1] - hsv_b[1])
        dv = abs(hsv_a[2] - hsv_b[2]) / 255.
        distance = math.sqrt(dh * dh + ds * ds + dv * dv)
        return round(1. - (distance), 2)
    
######################################################    
class SimilarityArtworks(SimilarityFunctionInterface):
    """Compute similarity between artworks"""
    def __init__(self, artist_weight=0.3, color_weight=0.3, category_weight = 0.4):
        self.artist_weight = artist_weight
        self.color_weight = color_weight
        self.category_weight = category_weight
        
    def computeSimilarity(self, A, B):
        """Overrides SimilarityFunctionInterface.computeSimilarity()"""
        artist_sim = SimilarityArtist().computeSimilarity(A, B)
        color_sim = SimilarityColors().computeSimilarity(A, B)
        cat_sim = SimilarityCategory().computeSimilarity(A,B)
        
        return (self.artist_weight * artist_sim) + (self.color_weight * color_sim)  + (self.category_weight * cat_sim)
######################################################    
class SimilarityPerson(SimilarityFunctionInterface):
    """Compute similarity between artworks"""
    def __init__(self, demog_weight = 0.5, artw_weight = 0.5):
        self.demog_weight = demog_weight
        self.artw_weight = artw_weight
        
    def computeSimilarity(self, A, B):
        """Overrides SimilarityFunctionInterface.computeSimilarity()"""
        demog_sim = SimilarityDemographic().computeSimilarity(A, B)
        artw_sim = SimilarityColors().computeSimilarity(A, B)                                                
        return (self.demog_weight * demog_sim) + (self.artw_weight * artw_sim)

In [92]:
SimilarityArtworks().computeSimilarity('Q920030', 'Q124362')

0.10200000000000001

In [93]:
similitudCuadros=np.zeros((len(aux),len(aux)))
i = 0
for a in aux['wd:paintingID']:
    j = 0
    for b in aux['wd:paintingID']:
        similitudCuadros[i][j] = SimilarityArtworks().computeSimilarity(a, b)
        j +=1
    i+=1

In [94]:
simCuadros = pd.DataFrame(similitudCuadros, index = [i for i in aux['wd:paintingID']], columns = [i for i in aux['wd:paintingID']])
simCuadros.to_csv('data/simCuadros.csv')

In [95]:
# class SimilarityDemographic(SimilarityFunctionInterface):
#     """Compute similarity between people (by demographic)"""
#     def __init__(self, age_index, country_weight=0.3, age_weight=0.5, gender_weight=0.2):
#         self.country_weight = country_weight
#         self.age_weight = age_weight
#         self.gender_weight = gender_weight
#         self.age_index = age_index
    
#     def computeSimilarity(self, A, B):
#         """Overrides SimilarityFuntionInterface.computeSimilarity()"""
#         country_sim = (A.country == B.country)
#         age_sim = 1 - (1 / (self.age_index - 1) * abs(A.age - B.age))
#         gender_sim = (A.gender == B.gender)
                       
#         return (country_sim*self.country_weight) + (age_sim*self.age_weight) + (gender_sim*self.gender_weight)

In [96]:
s = SimilarityDemographic(4)

In [97]:
u = pd.DataFrame([[4.1074, 2.021723, 1.787405], [2.347123, 0.000000, 1.787405]], columns=['age', 'gender', 'country'])
u

Unnamed: 0,age,gender,country
0,4.1074,2.021723,1.787405
1,2.347123,0.0,1.787405


In [98]:
type(u.loc(0)[0])

pandas.core.series.Series

In [99]:
s.computeSimilarity(u.loc(0)[0], u.loc(0)[1])

0.5066204999999999

In [100]:
import pandas as pd

In [101]:
users = pd.read_csv("data/Prado_users_scaled.csv")

FileNotFoundError: [Errno 2] No such file or directory: 'data/Prado_users_scaled.csv'

In [None]:
s.computeSimilarity(users.loc(0)[0], users.loc(0)[0])

In [None]:
users_clean = users.drop(columns="userId")
users_clean

In [None]:
users

In [None]:
users_similarity = []
for i in range(0, len(users)):
    sim_list = []
    for j in range(0, len(users)):
        sim = s.computeSimilarity(users_clean.loc(0)[i], users_clean.loc(0)[j])
        sim_list.append(sim)
        
    users_similarity.append(sim_list)

In [None]:
print(users_similarity[2][2])

In [None]:
for i in range(len(users_similarity)):
    users_similarity