In [1]:
import pandas as pd
import numpy as np
from pandas.core.series import Series
import abc
import json
import colorsys
import math

In [2]:
USERS_CSV = 'data/Prado_users.csv'
USERS_SCALED_CSV = 'data/Prado_users_scaled.csv'
ARTWORKS_CSV = 'data/Prado_artworks_wikidata.csv'

USERS_EMOTIONS_CSV = 'data/Prado_users_emotions.csv'
USERS_EMOTIONS_SCALED_CSV = 'data/Prado_users_emotions_scaled.csv'

In [3]:
class SimilarityFunctionInterface(metaclass=abc.ABCMeta):
    
    @abc.abstractmethod
    def computeSimilarity(self, A, B) -> float:
        """Compute similarity between two lists"""
        raise NotImplementedError()

In [97]:
class SimilarityAge(SimilarityFunctionInterface):
    """Compute similarity between users (by age)"""
    def __init__(self, age_index, data_csv=USERS_EMOTIONS_SCALED_CSV):
        self.age_index = age_index
        self.data = pd.read_csv(data_csv)
        
    def computeSimilarity(self, A, B):
        """Overrides SimilarityFuntionInterface.computeSimilarity()"""
        ageA = self.data.loc[self.data['userId'] == A]['age'].to_list()[0]
        ageB = self.data.loc[self.data['userId'] == B]['age'].to_list()[0]
        return 1 - (1 / (self.age_index - 1) * abs(ageA - ageB))
    
class SimilarityGender(SimilarityFunctionInterface):
    """Compute similarity between users (by age)"""
    def __init__(self, data_csv=USERS_EMOTIONS_SCALED_CSV):
        self.data = pd.read_csv(data_csv)
      
    def computeSimilarity(self, A, B):
        """Overrides SimilarityFuntionInterface.computeSimilarity()"""
        genderA = self.data.loc[self.data['userId'] == A]['gender'].to_list()[0]
        genderB = self.data.loc[self.data['userId'] == B]['gender'].to_list()[0]
        return genderA == genderB
    
class SimilarityCountry(SimilarityFunctionInterface):
    """Compute similarity between users (by age)"""
    def __init__(self, data_csv=USERS_EMOTIONS_SCALED_CSV):
        self.data = pd.read_csv(data_csv)
        
    def computeSimilarity(self, A, B):
        """Overrides SimilarityFuntionInterface.computeSimilarity()"""
        countryA = self.data.loc[self.data['userId'] == A]['country'].to_list()[0]
        countryB = self.data.loc[self.data['userId'] == B]['country'].to_list()[0]
        return countryA == countryB

###################################################
class SimilarityDemographic(SimilarityFunctionInterface):
    """Compute similarity between users (by demographic)"""
    def __init__(self, age_index, country_weight=0.3, age_weight=0.5, gender_weight=0.2, data_csv=USERS_EMOTIONS_SCALED_CSV):
        self.country_weight = country_weight
        self.age_weight = age_weight
        self.gender_weight = gender_weight
        self.age_index = age_index
        self.data = pd.read_csv(data_csv)
    
    def computeSimilarity(self, A, B):
        """Overrides SimilarityFuntionInterface.computeSimilarity()"""
        country_sim = SimilarityCountry().computeSimilarity(A, B)
        age_sim = SimilarityAge(self.age_index).computeSimilarity(A, B)
        gender_sim = SimilarityGender().computeSimilarity(A, B)
                       
        return (country_sim*self.country_weight) + (age_sim*self.age_weight) + (gender_sim*self.gender_weight)
    
######################################################    
class SimilarityPolarity(SimilarityFunctionInterface):
    """Compute similarity between users (by artwork tastes)"""
    def __init__(self, artworks_sim, positive_weight=0.4, negative_weight=0.4, mixed_weight=0.2, data_csv=USERS_EMOTIONS_SCALED_CSV):
        self.positive_weight = positive_weight
        self.negative_weight = negative_weight
        self.mixed_weight = mixed_weight
        self.artworks_sim = artworks_sim
        self.data = pd.read_csv(data_csv)
        
    def computeSimilarity(self, A, B):
        """Overrides SimilarityFunctionInterface.computeSimilarity()"""
        positiveA = self.data.loc[self.data['userId'] == A]['positive'].apply(eval).to_list()[0]
        positiveB = self.data.loc[self.data['userId'] == B]['positive'].apply(eval).to_list()[0]
        negativeA = self.data.loc[self.data['userId'] == A]['negative'].apply(eval).to_list()[0]
        negativeB = self.data.loc[self.data['userId'] == B]['negative'].apply(eval).to_list()[0]
        mixedA = self.data.loc[self.data['userId'] == A]['mixed'].apply(eval).to_list()[0]
        mixedB = self.data.loc[self.data['userId'] == B]['mixed'].apply(eval).to_list()[0]
        
        positive_sim = 0
        i = 0
        for art1 in positiveA:
            for art2 in positiveB:
                positive_sim += self.artworks_sim[art1][art2]
                i += 1
        positive_sim /= i if i > 0 else 1
        
        negative_sim = 0
        i = 0
        for art1 in negativeA:
            for art2 in negativeB:
                negative_sim += self.artworks_sim[art1][art2]
                i += 1
        negative_sim /= i if i > 0 else 1
        
        mixed_sim = 0
        i = 0
        for art1 in mixedA:
            for art2 in mixedB:
                mixed_sim += self.artworks_sim[art1][art2]
                i += 1
        mixed_sim /= i if i > 0 else 1
        
        return (positive_sim * self.positive_weight) + (negative_sim * self.negative_weight) + (mixed_sim * self.mixed_weight)
    
######################################################    
class SimilarityUsers(SimilarityFunctionInterface):
    """Compute similarity between users"""
    def __init__(self, age_index, artworks_sim, demog_weight = 0.5, artw_weight = 0.5):
        self.demog_weight = demog_weight
        self.artw_weight = artw_weight
        self.age_index = age_index
        self.artworks_sim = artworks_sim
        
    def computeSimilarity(self, A, B):
        """Overrides SimilarityFunctionInterface.computeSimilarity()"""
        demog_sim = SimilarityDemographic(self.age_index).computeSimilarity(A, B)
        artw_sim = SimilarityPolarity(self.artworks_sim).computeSimilarity(A, B)                                                
        return (self.demog_weight * demog_sim) + (self.artw_weight * artw_sim)

In [98]:
class SimilarityArtist(SimilarityFunctionInterface):
    """Compute similarity between artworks (by artist)"""
    def __init__(self, data_csv=ARTWORKS_CSV):
        self.data = pd.read_csv(data_csv)
    
    def computeSimilarity(self, A, B):
        """Overrides SimilarityFunctionInterface.computeSimilarity()"""
        return (self.data.loc[self.data['ID'] == A]['Artist'].to_list()[0] == self.data.loc[self.data['ID'] == B]['Artist'].to_list()[0])
        
class SimilarityCategory(SimilarityFunctionInterface):
    """Compute similarity between artworks (by category)"""
    def __init__(self, data_csv=ARTWORKS_CSV):
        self.data = pd.read_csv(data_csv)

    def computeSimilarity(self, A, B):
        """Overrides SimilarityFunctionInterface.computeSimilarity()"""
        return(self.data.loc[self.data['ID'] == A]['Category'].to_list()[0] == self.data.loc[self.data['ID'] == B]['Category'].to_list()[0])

class SimilarityColors(SimilarityFunctionInterface):
    """Compute similarity between artworks (by color)"""
    def __init__(self, data_csv=ARTWORKS_CSV, colors_json='data/artworkColors.json'):
        with open(colors_json)as f:
            self.colors = json.load(f)
            self.data = pd.read_csv(data_csv) 
        
    def computeSimilarity(self, A, B):
        """Overrides SimilarityFunctionInterface.computeSimilarity()"""
        A = list(self.data[self.data['ID'] == A]['wd:paintingID'])[0]
        B = list(self.data[self.data['ID'] == B]['wd:paintingID'])[0]
        max_a = int(max(self.colors.get(A).get('frequency'), key=self.colors.get(A).get('frequency').get))
        max_b = int(max(self.colors.get(B).get('frequency'), key=self.colors.get(B).get('frequency').get))
        
        rgb_a = self.colors.get(A).get('colors')[max_a]
        rgb_b = self.colors.get(B).get('colors')[max_b]
        
        hsv_a = colorsys.rgb_to_hsv(rgb_a[0], rgb_a[1], rgb_a[2])
        hsv_b = colorsys.rgb_to_hsv(rgb_b[0], rgb_b[1], rgb_b[2])
        
        ### Sacado de año anterior ###
        dh = min(abs(hsv_a[0]-hsv_b[0]), 360-abs(hsv_a[0]-hsv_b[0])) / 180.0
        ds = abs(hsv_a[1] - hsv_b[1])
        dv = abs(hsv_a[2] - hsv_b[2]) / 255.
        distance = math.sqrt(dh * dh + ds * ds + dv * dv)
        return round(1. - (distance), 2)
    
######################################################    
class SimilarityArtworks(SimilarityFunctionInterface):
    """Compute similarity between artworks"""
    def __init__(self, artist_weight=0.3, color_weight=0.3, category_weight = 0.4):
        self.artist_weight = artist_weight
        self.color_weight = color_weight
        self.category_weight = category_weight
        
    def computeSimilarity(self, A, B):
        """Overrides SimilarityFunctionInterface.computeSimilarity()"""
        artist_sim = SimilarityArtist().computeSimilarity(A, B)
        color_sim = SimilarityColors().computeSimilarity(A, B)
        cat_sim = SimilarityCategory().computeSimilarity(A,B)
        
        return (self.artist_weight * artist_sim) + (self.color_weight * color_sim)  + (self.category_weight * cat_sim)

In [99]:
SimilarityArtworks().computeSimilarity('57726f48edc2cb3880ba012f', '57726f56edc2cb3880ba7233')

0.88

In [100]:
SimilarityUsers(age_index=9, artworks_sim=simCuadros).computeSimilarity(1, 2)

0.41624931509712826

In [11]:
aux = pd.read_csv(ARTWORKS_CSV)

similitudCuadros=np.zeros((len(aux),len(aux)))
i = 0
for a in aux['ID']:
    j = 0
    for b in aux['ID']:
        similitudCuadros[i][j] = SimilarityArtworks().computeSimilarity(a, b)
        j +=1
    i+=1

In [12]:
simCuadros = pd.DataFrame(similitudCuadros, index = [i for i in aux['ID']], columns = [i for i in aux['ID']])
simCuadros.to_csv('data/simCuadros.csv')

In [20]:
# class SimilarityDemographic(SimilarityFunctionInterface):
#     """Compute similarity between people (by demographic)"""
#     def __init__(self, age_index, country_weight=0.3, age_weight=0.5, gender_weight=0.2):
#         self.country_weight = country_weight
#         self.age_weight = age_weight
#         self.gender_weight = gender_weight
#         self.age_index = age_index
    
#     def computeSimilarity(self, A, B):
#         """Overrides SimilarityFuntionInterface.computeSimilarity()"""
#         country_sim = (A.country == B.country)
#         age_sim = 1 - (1 / (self.age_index - 1) * abs(A.age - B.age))
#         gender_sim = (A.gender == B.gender)
                       
#         return (country_sim*self.country_weight) + (age_sim*self.age_weight) + (gender_sim*self.gender_weight)

In [21]:
s = SimilarityDemographic(4)

In [22]:
u = pd.DataFrame([[4.1074, 2.021723, 1.787405], [2.347123, 0.000000, 1.787405]], columns=['age', 'gender', 'country'])
u

Unnamed: 0,age,gender,country
0,4.1074,2.021723,1.787405
1,2.347123,0.0,1.787405


In [23]:
type(u.loc(0)[0])

pandas.core.series.Series

In [24]:
s.computeSimilarity(u.loc(0)[0], u.loc(0)[1])

0.5066204999999999

In [25]:
import pandas as pd

In [26]:
users = pd.read_csv("data/Prado_users_emotions_scaled.csv")
users['positive'] = users['positive'].apply(eval)
users['negative'] = users['negative'].apply(eval)
users['mixed'] = users['mixed'].apply(eval)

In [27]:
users.loc(0)[0]

age                                                  4.107464
gender                                               2.021723
country                                              1.787405
userId                                                      1
positive    [57726f4cedc2cb3880ba07ed, 57726f4cedc2cb3880b...
negative    [577278d5edc2cb3880d94196, 57727015edc2cb3880b...
mixed       [5772700eedc2cb3880bc82ce, 5772700eedc2cb3880b...
Name: 0, dtype: object

In [46]:
s = SimilarityUsers(age_index = 9, artworks_sim = simCuadros)
users_similarity = []

for i in range(0, len(users)):
    sim_list = []
    for j in range(0, len(users)):
        sim = s.computeSimilarity(users.loc(0)[i], users.loc(0)[j])
        if(sim >= 0):
            sim_list.append(sim)
        else:
            sim_list.append(0)
        
    users_similarity.append(sim_list)

TypeError: object of type 'SimilarityUsers' has no len()

In [29]:
userSim = pd.DataFrame(users_similarity)
userSim
userSim.to_csv('data/usersSim.csv')

In [30]:
s.computeSimilarity(users_clean.loc(0)[3], users_clean.loc(0)[2])

NameError: name 'users_clean' is not defined