In [1]:
import pandas as pd
import numpy as np
from math import pi

In [2]:
df = pd.read_csv("programme_ratings.csv")
df

Unnamed: 0.1,Unnamed: 0,Maciej Garbacz,Maciej Bober,Tomasz Soróbka,Greta Slivinskaitė,Łukasz Graff,Paula Wołkowska
0,Mathematics,1,1,0,0,2,1
1,Electrical Engineering,2,0,1,-2,-1,1
2,Mechanical Engineering,1,2,0,0,-2,-1
3,Industrial Design,-2,-2,1,1,0,0
4,Biomedical Engineering,0,1,-1,1,2,-1
5,Business,-1,1,1,0,0,1
6,Psychology,-2,-2,-1,1,1,-1
7,Physics,1,1,-1,1,1,1


In [3]:
ratings_df = df.copy().iloc[:, 1:]
ratings = ratings_df.to_numpy()
ratings

array([[ 1,  1,  0,  0,  2,  1],
       [ 2,  0,  1, -2, -1,  1],
       [ 1,  2,  0,  0, -2, -1],
       [-2, -2,  1,  1,  0,  0],
       [ 0,  1, -1,  1,  2, -1],
       [-1,  1,  1,  0,  0,  1],
       [-2, -2, -1,  1,  1, -1],
       [ 1,  1, -1,  1,  1,  1]], dtype=int64)

In [4]:
mean_vector = np.asfarray([sum(row)/len(row) for row in ratings])
mean_vector

array([ 0.83333333,  0.16666667,  0.        , -0.33333333,  0.33333333,
        0.33333333, -0.66666667,  0.66666667])

In [17]:
vectors = dict()
for column_name, column_data in ratings_df.iteritems():
    vectors[column_name] = np.asfarray([num for num in column_data])

def get_angle(v1, v2) -> float:
    angle = np.arccos(np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2)))*180/pi
    if angle > 180:
        angle = 360 - angle
    return angle

def is_vector_zero(v) -> bool:
    return all(num == 0 for num in v)
    
dot_products = dict()
for person, vector in vectors.items():
    dot_products[person] = get_angle(vector, mean_vector)
        
print(dot_products)

{'Maciej Garbacz': 51.134449497978274, 'Maciej Bober': 41.667286371890285, 'Tomasz Soróbka': 92.79685838439701, 'Greta Slivinskaitė': 94.84816052676035, 'Łukasz Graff': 66.34748847052455, 'Paula Wołkowska': 50.76847951640775}


Most Average -> Greta
Most Extreme -> Maciej

In [24]:
PREDICTION_RATINGS = [0, 0, None, None, None, None, None, None]
rating_vector = np.asfarray([num for num in PREDICTION_RATINGS if num is not None])

prediction_vectors = dict()

prediction_vectors_with_zeros = dict()

for person, vector in vectors.items():
    prediction_vectors[person] = np.asfarray([
        num 
        for num, prediction_num 
        in zip(vector, PREDICTION_RATINGS) 
        if prediction_num is not None
    ])
    
    prediction_vectors_with_zeros[person] = prediction_vectors[person]
    
    if is_vector_zero(prediction_vectors[person]):
        del prediction_vectors[person]
        
print(prediction_vectors_with_zeros)

if is_vector_zero(rating_vector):
    closest_person = min(
        prediction_vectors_with_zeros.items(), 
        key=lambda x: np.linalg.norm(x[1])
    )[0]
else:
    angles = {
        person: get_angle(vector, rating_vector)
        for person, vector
        in prediction_vectors.items()
    }
    closest_person = min(angles.items(), key=lambda x: x[1])[0]
    

{'Maciej Garbacz': array([1., 2.]), 'Maciej Bober': array([1., 0.]), 'Tomasz Soróbka': array([0., 1.]), 'Greta Slivinskaitė': array([ 0., -2.]), 'Łukasz Graff': array([ 2., -1.]), 'Paula Wołkowska': array([1., 1.])}
Maciej Bober


In [8]:
closest_person

'Maciej Bober'

In [26]:
closest_person_vector = vectors[closest_person]

In [27]:
predicted_vector = [
    closest_person_num 
    if prediction_num is None
    else prediction_num
    for closest_person_num, prediction_num 
    in zip(closest_person_vector, PREDICTION_RATINGS)
]
predicted_vector

[0, 0, 2.0, -2.0, 1.0, 1.0, -2.0, 1.0]