# Inicialización de datos

Se abren los archivos y se guardan en memoria para poder hacer uso posterior de estos.

In [130]:
movies_file = open('movies.dat', 'r', encoding="utf8")
ratings_file = open('ratings.dat', 'r', encoding="utf8")

matrix = dict()
users_matrix = dict()

In [131]:
def insert_sorted(a, tuple_value):
    _, value, count = tuple_value
    hi, lo = len(a), 0
    while lo < hi:
        mid = (lo + hi) // 2
        if value > a[mid][1] or (value == a[mid][1] and count > a[mid][2]):
            hi = mid
        else:
            lo = mid + 1
    a.insert(lo, tuple_value)

In [132]:
for line in movies_file:
    movie_id, name, tags = line.strip().split('::')
    matrix[movie_id] = {
        'name': name,
        'tags': tags.split('|'),
        'ratings': dict(),
        'ratings_count': 0,
        'total_rating': 0,
    }

In [133]:
for line in ratings_file:
    user_id, movie_id, rating, _ = line.strip().split('::')
    if matrix[movie_id]['ratings_count'] == 0:
        matrix[movie_id]['total_rating'] = float(rating)
    else:
        matrix[movie_id]['total_rating'] = round((
            float(rating) + (matrix[movie_id]['total_rating'] * matrix[movie_id]['ratings_count'])
        ) / (matrix[movie_id]['ratings_count'] + 1), 1)
    matrix[movie_id]['ratings'][user_id] = float(rating)
    matrix[movie_id]['ratings_count'] += 1
    
    if user_id not in users_matrix:
        users_matrix[user_id] = dict()
    users_matrix[user_id][movie_id] = float(rating)
        

In [134]:
most_popular_movies = []
for movie_id, movie in matrix.items():
    insert_sorted(most_popular_movies, (movie_id, movie['ratings_count'], movie['total_rating']))

# Solicitar ratings

In [169]:
print('A continuación deberás puntuar 10 peliculas de acuerdo a tus gustos.')
print('Tu puntiación debe estar entre 0.5 y 5.0, en intervalos de 0.5, si no deseas opinar sobre una pelicula marca 0')

for movie_id, _, _ in most_popular_movies[:10]:
    ask_rating = True
    while ask_rating:
        try:
            rating = float(input('{}: '.format(matrix[movie_id]['name'])))
            
            if rating % 0.5 != 0 or rating < 0 or rating > 5:
                raise ValueError()
            
            if rating != 0:
                users_matrix['new_user'] = { movie_id: rating }
            ask_rating = False
        except ValueError:
            print('Has ingresado de manera incorrecta el rating, recuerda que debe ser múltiplo de 0.5 y estar entre 0.5 y 5.0')
        

A continuación deberás puntuar 10 peliculas de acuerdo a tus gustos.
Tu puntiación debe estar entre 0.5 y 5.0, en intervalos de 0.5, si no deseas opinar sobre una pelicula marca 0
Pulp Fiction (1994): 0.5
Forrest Gump (1994): 0.5
Silence of the Lambs, The (1991): 0.5
Jurassic Park (1993): 0.5
Shawshank Redemption, The (1994): 0.5
Braveheart (1995): 0.5
Fugitive, The (1993): 0.5
Terminator 2: Judgment Day (1991): 0.5
Star Wars: Episode IV - A New Hope (a.k.a. Star Wars) (1977): 0.5
Apollo 13 (1995): 0.5


In [182]:
def dist(user_1, user_2):
    dist = 0
    not_seen_movies = set()
    evaluated_movies = 0
    for movie_id, rating in users_matrix[user_2].items():
        if movie_id in users_matrix[user_1]:
            dist += (rating - users_matrix[user_1][movie_id]) ** 2
            evaluated_movies += 1
        else:
            not_seen_movies.add(movie_id)

    return dist ** (1/2), not_seen_movies, evaluated_movies

def get_sim_users(radius=None):
    users = []
    all_movies = set()
    for user in users_matrix.keys():
        if user == 'new_user':
            continue
        distance, not_seen_movies, evaluated_movies = dist('new_user', user)
        if (radius is None or distance < radius) and evaluated_movies > 1:
            print(distance)
            sim = 1 / (1 + distance)
            insert_sorted(users, (user, sim, len(not_seen_movies)))
            all_movies = all_movies.union(not_seen_movies)
    return users, all_movies

In [183]:
sim_users, movies_to_recommend = get_sim_users()
recommended_movies = []
for movie_id in movies_to_recommend:
    denominator = 0
    numerator = 0
    for user_id, user_sim, _ in sim_users:
        if movie_id not in users_matrix[user_id]:
            continue
        denominator += users_matrix[user_id][movie_id] * user_sim
        numerator += user_sim
    recommended_rating = denominator / numerator
    insert_sorted(recommended_movies, (movie_id, round(recommended_rating, 1), 0))

In [179]:
for i in range(min(5, len(recommended_movies))):
    movie_id, rating, _ = recommended_movies[i]
    print('{} - {} {}'.format(i + 1, matrix[movie_id]['name'], rating))