In [7]:
import pandas as pd
from pandas.core.series import Series
import abc

In [8]:
class SimilarityFunctionInterface(metaclass=abc.ABCMeta):
    
    @abc.abstractmethod
    def computeSimilarity(self, A, B) -> float:
        """Compute similarity between two lists"""
        raise NotImplementedError()

In [9]:
class SimilarityAge(SimilarityFunctionInterface):
    """Compute similarity between people (by age)"""
    def __init__(self, age_index):
        self.age_index = age_index
        
    def computeSimilarity(self, A, B):
        """Overrides SimilarityFuntionInterface.computeSimilarity()"""
        return 1 - (1 / (self.age_index - 1) * abs(A - B))
    
class SimilarityGender(SimilarityFunctionInterface):
    """Compute similarity between people (by age)"""
        
    def computeSimilarity(self, A, B):
        """Overrides SimilarityFuntionInterface.computeSimilarity()"""
        return A == B
    
class SimilarityCountry(SimilarityFunctionInterface):
    """Compute similarity between people (by age)"""
        
    def computeSimilarity(self, A, B):
        """Overrides SimilarityFuntionInterface.computeSimilarity()"""
        return A == B

###################################################
class SimilarityDemographic(SimilarityFunctionInterface):
    """Compute similarity between people (by demographic)"""
    def __init__(self, age_index, country_weight=0.3, age_weight=0.5, gender_weight=0.2):
        self.country_weight = country_weight
        self.age_weight = age_weight
        self.gender_weight = gender_weight
        self.age_index = age_index
    
    def computeSimilarity(self, A, B):
        """Overrides SimilarityFuntionInterface.computeSimilarity()"""
        country_sim = SimilarityCountry().computeSimilarity(A.country, B.country)
        age_sim = SimilarityAge(self.age_index).computeSimilarity(A.age, B.age)
        gender_sim = SimilarityGender().computeSimilarity(A.gender, B.gender)
                       
        return (country_sim*self.country_weight) + (age_sim*self.age_weight) + (gender_sim*self.gender_weight)
    
######################################################    
class SimilarityArtworks(SimilarityFunctionInterface):
    """Compute similarity between people (by artwork tastes)"""
    def __init__(self, artworks_sim, positive_weight=0.4, negative_weight=0.4, mixed_weight=0.2):
        self.positive_weight = positive_weight
        self.negative_weight = negative_weight
        self.mixed_weight = mixed_weight
        
    def computeSimilarity(self, A, B):
        """Overrides SimilarityFunctionInterface.computeSimilarity()"""
        positive_sim = 0
        for art1 in A.positive:
            for art2 in B.positive:
                positive_sim += self.artworks_sim[art1][art2]
        positive_sim /= len(A.positive) if len(A.positive) > len(B.positive) else len(B.positive)
        
        negative_sim = 0
        for art1 in A.negative:
            for art2 in B.negative:
                negative_sim += self.artworks_sim[art1][art2]
        negative_sim /= len(A.negative) if len(A.negative) > len(B.negative) else len(B.negative)
        
        mixed_sim = 0
        for art1 in A.mixed:
            for art2 in B.mixed:
                mixed_sim += self.artworks_sim[art1][art2]
        mixed_sim /= len(A.mixed) if len(A.mixed) > len(B.mixed) else len(B.mixed)
        
        return (positive_sim * self.positive_weight) + (negative_sim * self.negative_weight) + (mixed * self.mixed_weight)

In [114]:
# class SimilarityDemographic(SimilarityFunctionInterface):
#     """Compute similarity between people (by demographic)"""
#     def __init__(self, age_index, country_weight=0.3, age_weight=0.5, gender_weight=0.2):
#         self.country_weight = country_weight
#         self.age_weight = age_weight
#         self.gender_weight = gender_weight
#         self.age_index = age_index
    
#     def computeSimilarity(self, A, B):
#         """Overrides SimilarityFuntionInterface.computeSimilarity()"""
#         country_sim = (A.country == B.country)
#         age_sim = 1 - (1 / (self.age_index - 1) * abs(A.age - B.age))
#         gender_sim = (A.gender == B.gender)
                       
#         return (country_sim*self.country_weight) + (age_sim*self.age_weight) + (gender_sim*self.gender_weight)

In [115]:
s = SimilarityDemographic(4)

In [116]:
u = pd.DataFrame([[4.1074, 2.021723, 1.787405], [2.347123, 0.000000, 1.787405]], columns=['age', 'gender', 'country'])
u

Unnamed: 0,age,gender,country
0,4.1074,2.021723,1.787405
1,2.347123,0.0,1.787405


In [117]:
type(u.loc(0)[0])

pandas.core.series.Series

In [118]:
s.computeSimilarity(u.loc(0)[0], u.loc(0)[1])

0.5066204999999999

In [119]:
import pandas as pd

In [120]:
users = pd.read_csv("data/Prado_users_scaled.csv")

In [121]:
s.computeSimilarity(users.loc(0)[0], users.loc(0)[0])

1.0

In [128]:
users_clean = users.drop(columns="userId")
users_clean

Unnamed: 0,age,gender,country
0,4.107464,2.021723,1.787405
1,2.347123,0.000000,1.787405
2,1.760342,0.000000,1.787405
3,1.760342,0.000000,1.787405
4,2.933903,2.021723,1.787405
...,...,...,...
166,3.520684,2.021723,1.787405
167,3.520684,2.021723,1.787405
168,2.347123,0.000000,1.787405
169,3.520684,2.021723,1.787405


In [129]:
users

Unnamed: 0,age,gender,country,userId
0,4.107464,2.021723,1.787405,1
1,2.347123,0.000000,1.787405,2
2,1.760342,0.000000,1.787405,3
3,1.760342,0.000000,1.787405,4
4,2.933903,2.021723,1.787405,5
...,...,...,...,...
166,3.520684,2.021723,1.787405,298
167,3.520684,2.021723,1.787405,299
168,2.347123,0.000000,1.787405,300
169,3.520684,2.021723,1.787405,301


In [131]:
users_similarity = []
for i in range(0, len(users)):
    sim_list = []
    for j in range(0, len(users)):
        sim = s.computeSimilarity(users_clean.loc(0)[i], users_clean.loc(0)[j])
        sim_list.append(sim)
        
    users_similarity.append(sim_list)

In [132]:
print(users_similarity[2][2])

1.0


In [133]:
for i in range(len(users_similarity)):
    users_similarity