In [None]:
from flask import Flask,render_template,request
    # Import libraries
import numpy as np
import pandas as pd
from scipy.sparse.linalg import svds
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
from timeit import default_timer
from sklearn.neighbors import KNeighborsClassifier

app = Flask(__name__)

def predict_movies(predictions, userID, movies, original_ratings, num_recommendations):
    """
    Implementation of SVD by hand
    :param predictions : The SVD reconstructed matrix, 
    userID : UserId for which you want to predict the top rated movies, 
    movies : Matrix with movie data, original_ratings : Original Rating matrix, 
    num_recommendations : num of recos to be returned
    :return: num_recommendations top movies
    """ 
    # Get and sort the user's predictions
    user_row_number = userID - 1 # User ID starts at 1, not 0
    sorted_user_predictions = predictions.iloc[user_row_number].sort_values(ascending=False) # User ID starts at 1
    
    # Get the user's data and merge in the movie information.
    user_data = original_ratings[original_ratings.userId == (userID)]
    user_full = (user_data.merge(movies, how = 'left', left_on = 'movieId', right_on = 'movieId').
                     sort_values(['rating'], ascending=False)
                 )

    print('User {0} has already rated {1} movies.'.format(userID, user_full.shape[0]))
    print('Recommending highest {0} predicted ratings movies not already rated.'.format(num_recommendations))
    
    # Recommend the highest predicted rating movies that the user hasn't seen yet.
    recommendations = (movies[~movies['movieId'].isin(user_full['movieId'])].
         merge(pd.DataFrame(sorted_user_predictions).reset_index(), how = 'left',
               left_on = 'movieId',
               right_on = 'movieId').
         rename(columns = {user_row_number: 'Predictions'}).
         sort_values('Predictions', ascending = False).
                       iloc[:num_recommendations, :-1]
                      )

    return user_full, recommendations


def get_recommendations_based_on_genres(movie_title, cosine_sim_movies, df_movies):
    """
    Calculates top 2 movies to recommend based on given movie titles genres. 
    :param movie_title: title of movie to be taken for base of recommendation
    :param cosine_sim_movies: cosine similarity between movies 
    :param df_movies: dataframe containing movie information
    :return: Titles of movies recommended to user as a string, or an error message if the movie doesn't exist
    """
    movie = df_movies.loc[df_movies['title'].str.lower() == movie_title.lower()]
    if movie.empty:
        return "Error: Movie not found"
    else:
        idx_movie = df_movies.loc[df_movies['title'].isin([movie_title])].index
        sim_scores_movies = list(enumerate(cosine_sim_movies[idx_movie][0]))
        sim_scores_movies = sorted(sim_scores_movies, key=lambda x: x[1], reverse=True)
        sim_scores_movies = sim_scores_movies[1:3]
        movie_indices = [i[0] for i in sim_scores_movies]
        return ', '.join(df_movies['title'].iloc[movie_indices].values)








@app.route("/")
def home():
    return render_template('index.html')
@app.route('/', methods=['POST'])
def text_display():
    # Reading ratings file
    ratings = pd.read_csv('rating.csv',nrows = 1000000)

    # Reading movies file
    movies = pd.read_csv('movie.csv',nrows = 1000000)
    df_movies = movies
    dfmov = df_movies
    df_ratings = ratings

    # Define a TF-IDF Vectorizer Object.
    tfidf_movies_genres = TfidfVectorizer(token_pattern = '[a-zA-Z0-9\-]+')

#Replace NaN with an empty string
    df_movies['genres'] = df_movies['genres'].replace(to_replace="(no genres listed)", value="")

#Construct the required TF-IDF matrix by fitting and transforming the data
    tfidf_movies_genres_matrix = tfidf_movies_genres.fit_transform(df_movies['genres'])
# print(tfidf_movies_genres.get_feature_names())
# Compute the cosine similarity matrix
# print(tfidf_movies_genres_matrix.shape)
# print(tfidf_movies_genres_matrix.dtype)
    cosine_sim_movies = linear_kernel(tfidf_movies_genres_matrix, tfidf_movies_genres_matrix)

    # Data preprocessing for movies dataframe
    '''
    movies['year'] = movies.title.str.extract('(\(\d\d\d\d\))',expand=False)
    movies['year'] = movies.year.str.extract('(\d\d\d\d)',expand=False)
    movies['title'] = movies.title.str.replace('(\(\d\d\d\d\))', '')
    movies['title'] = movies['title'].apply(lambda x: x.strip())

    # Data preprocessing for ratings dataframe
    Ratings = ratings.pivot(index='userId', columns='movieId', values='rating').fillna(0)
    R = Ratings.to_numpy()
    user_ratings_mean = np.mean(R, axis=1)
    Ratings_demeaned = R - user_ratings_mean.reshape(-1, 1)
    U, sigma, Vt = svds(Ratings_demeaned, k=50)
    sigma = np.diag(sigma)
    all_user_predicted_ratings = np.dot(np.dot(U, sigma), Vt) + user_ratings_mean.reshape(-1, 1)
    preds = pd.DataFrame(all_user_predicted_ratings, columns=Ratings.columns)
    '''

    # Check if movieinput or text is given as input
    movieinput = request.form.get('text2')
    text = request.form.get('text', '')
    if movieinput:
        print(movieinput)
        recommend = get_recommendations_based_on_genres(movieinput, cosine_sim_movies, dfmov)
        return render_template('index.html', recommend=recommend)
    elif text:
        try:
            movies['year'] = movies.title.str.extract('(\(\d\d\d\d\))',expand=False)
            movies['year'] = movies.year.str.extract('(\d\d\d\d)',expand=False)
            movies['title'] = movies.title.str.replace('(\(\d\d\d\d\))', '')
            movies['title'] = movies['title'].apply(lambda x: x.strip())

            # Data preprocessing for ratings dataframe
            Ratings = ratings.pivot(index='userId', columns='movieId', values='rating').fillna(0)
            R = Ratings.to_numpy()
            user_ratings_mean = np.mean(R, axis=1)
            Ratings_demeaned = R - user_ratings_mean.reshape(-1, 1)
            U, sigma, Vt = svds(Ratings_demeaned, k=50)
            sigma = np.diag(sigma)
            all_user_predicted_ratings = np.dot(np.dot(U, sigma), Vt) + user_ratings_mean.reshape(-1, 1)
            preds = pd.DataFrame(all_user_predicted_ratings, columns=Ratings.columns)
            text = int(text)
            already_rated, predictions = predict_movies(preds, text, movies, ratings, 20)
            #print(predictions)
            titles = predictions['title'][:20]

            return render_template('index.html', titles=titles)
        except ValueError:
            return render_template('index.html', titles="wrong value entered,the user does not exists")
    else:
        return render_template('index.html')

if __name__ == "__main__":
    app.run()


 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


 * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)
127.0.0.1 - - [06/May/2023 17:33:40] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [06/May/2023 17:33:40] "GET /static/css/swiper.min.css HTTP/1.1" 404 -
127.0.0.1 - - [06/May/2023 17:33:40] "GET / HTTP/1.1" 200 -


Othello (1995)


127.0.0.1 - - [06/May/2023 17:34:03] "POST / HTTP/1.1" 200 -
127.0.0.1 - - [06/May/2023 17:34:03] "GET /static/css/swiper.min.css HTTP/1.1" 404 -
127.0.0.1 - - [06/May/2023 17:34:03] "GET / HTTP/1.1" 200 -


pokemon


127.0.0.1 - - [06/May/2023 17:35:08] "POST / HTTP/1.1" 200 -
127.0.0.1 - - [06/May/2023 17:35:08] "GET /static/css/swiper.min.css HTTP/1.1" 404 -
127.0.0.1 - - [06/May/2023 17:35:08] "GET / HTTP/1.1" 200 -
  movies['title'] = movies.title.str.replace('(\(\d\d\d\d\))', '')


User 150 has already rated 26 movies.
Recommending highest 20 predicted ratings movies not already rated.


127.0.0.1 - - [06/May/2023 17:36:36] "POST / HTTP/1.1" 200 -
127.0.0.1 - - [06/May/2023 17:36:36] "GET /static/css/swiper.min.css HTTP/1.1" 404 -
127.0.0.1 - - [06/May/2023 17:36:36] "GET / HTTP/1.1" 200 -


In [None]:
def get_recommendations_based_on_genres(movie_title, cosine_sim_movies,df_movies):
    """
    Calculates top 2 movies to recommend based on given movie titles genres. 
    :param movie_title: title of movie to be taken for base of recommendation
    :param cosine_sim_movies: cosine similarity between movies 
    :return: Titles of movies recommended to user as a string
    """

    idx_movie = df_movies.loc[df_movies['title'].isin([movie_title])].index
    sim_scores_movies = list(enumerate(cosine_sim_movies[idx_movie][0]))
    sim_scores_movies = sorted(sim_scores_movies, key=lambda x: x[1], reverse=True)
    sim_scores_movies = sim_scores_movies[1:3]
    movie_indices = [i[0] for i in sim_scores_movies]
    return ', '.join(df_movies['title'].iloc[movie_indices].values)


In [None]:
def text_display():
    # Reading ratings file
    ratings = pd.read_csv('rating.csv',nrows = 1000000)

    # Reading movies file
    movies = pd.read_csv('movie.csv',nrows = 1000000)
    df_movies = movies 
    df_ratings = ratings
    # Define a TF-IDF Vectorizer Object.
    tfidf_movies_genres = TfidfVectorizer(token_pattern = '[a-zA-Z0-9\-]+')

    #Replace NaN with an empty string
    df_movies['genres'] = df_movies['genres'].replace(to_replace="(no genres listed)", value="")

    #Construct the required TF-IDF matrix by fitting and transforming the data
    tfidf_movies_genres_matrix = tfidf_movies_genres.fit_transform(df_movies['genres'])
    # print(tfidf_movies_genres.get_feature_names())
    # Compute the cosine similarity matrix
    # print(tfidf_movies_genres_matrix.shape)
    # print(tfidf_movies_genres_matrix.dtype)
    cosine_sim_movies = linear_kernel(tfidf_movies_genres_matrix, tfidf_movies_genres_matrix)
    # print(cosine_sim_movies)
    movieinput = request.form['text2']
    print(movieinput)
    if movieinput != "":
        recommend = get_recommendations_based_on_genres(movieinput,cosine_sim_movies,df_movies)
     #   print(recommend)
        return render_template('index.html', recommend = recommend)
    else:
        print(movieinput)

    
    
    
    
    
    
    
    
    
    
    
    
    movies['year'] = movies.title.str.extract('(\(\d\d\d\d\))',expand=False)
    #Removing the parentheses
    movies['year'] = movies.year.str.extract('(\d\d\d\d)',expand=False)
    #Removing the years from the 'title' column
    movies['title'] = movies.title.str.replace('(\(\d\d\d\d\))', '')
    #Applying the strip function to get rid of any ending whitespace characters that may have appeared
    movies['title'] = movies['title'].apply(lambda x: x.strip())
    n_users = ratings.userId.unique().shape[0]
    n_movies = ratings.movieId.unique().shape[0]
    Ratings = ratings.pivot(index = 'userId', columns ='movieId', values = 'rating').fillna(0)
    R = Ratings.to_numpy()
    #print(R)
    user_ratings_mean = np.mean(R, axis = 1)
    #print(user_ratings_mean.shape)
    #print(user_ratings_mean.size)
    Ratings_demeaned = R - user_ratings_mean.reshape(-1, 1) ## Making the user_ratings_mean vertical by reshaping
    sparsity = round(1.0 - len(ratings) / float(n_users * n_movies), 3)
    U, sigma, Vt = svds(Ratings_demeaned, k = 50)
    sigma = np.diag(sigma)
    all_user_predicted_ratings = np.dot(np.dot(U, sigma), Vt) + user_ratings_mean.reshape(-1, 1)
    preds = pd.DataFrame(all_user_predicted_ratings, columns = Ratings.columns)


    
    
    
    
    
    
    
    
    
    text = request.form['text']
    try:
        text = int(text)
        already_rated, predictions = recommend_movies(preds, text, movies, ratings, 20)
        titles = predictions['title'][:20]
        return render_template('index.html', titles=titles)
    except ValueError:
        print("The string could not be converted to an integer.")
        return render_template('index.html', titles="wrong value entered")
