In [1]:
import numpy as np
import pandas as pd
from scipy.spatial.distance import pdist, squareform

ratings = pd.read_csv('ratings.csv')

In [2]:
def NNSrecommend(inputuser = 1):
    
    """Suggest movies based on NNS algorihm.
    
    Input:
    inputuser       -- "userId" of the selected user
    
    Output:
    recommendations -- "movieId"-s of recommended movies, numpy.ndarray format
    
    Error codes:
    -1              -- "inputuser" not found in database
    -2              -- There are no users, that have rated the same movies
    """
    
    if inputuser not in ratings['userId'].unique():
        return -1

    # Reduction of movies
    seenmovies = ratings[ratings['userId'] == inputuser]['movieId']
    smallratings = ratings[ratings['movieId'].isin(seenmovies)]

    # Reduction of users
    users = smallratings['userId'].value_counts() == len(seenmovies)
    users = users.index[users].tolist()
    smallratings = smallratings[smallratings['userId'].isin(users)]
        
    # Make pivot table and calculate distances
    table = pd.pivot_table(smallratings, values='rating', index='userId', columns='movieId')

    distances = pdist(table, metric='euclidean')
    distances = squareform(distances)
    
    # Select the distances we require
    distances = pd.DataFrame(index = table.index, columns = table.index, data = distances)
    distances = distances[distances.index == inputuser].drop(inputuser, axis = 1)
    
    if distances.empty:
        return -2
    
    nearestuser = distances.idxmin(axis = 1).values[0]

    recommendations = ratings[(ratings['userId'] == nearestuser) & (ratings['movieId'].isin(seenmovies) == False)
                              & (ratings['rating'] >= 4)]['movieId'].values
    
    return recommendations

In [3]:
rec = NNSrecommend(1)

In [4]:
rec

array([     3,     14,     21, ..., 145839, 146656, 148626], dtype=int64)