In [1]:
import joblib
import os
import pandas as pd
from sklearn.neighbors import NearestNeighbors
from scipy.sparse import csr_matrix
import numpy as np

In [None]:
MODELS_DIR = 'C:\\Md Shahid\\Liabilities\\Machine Learning Projects\\Recommendation System\\models\\'
DATA_DIR_PROCESSED = 'C:\\Md Shahid\\Liabilities\\Machine Learning Projects\\Recommendation System\\data\\processed\\' 


SPARSE_MATRIX_PATH = os.path.join(MODELS_DIR, 'movie_features_matrix.joblib')
INDEX_MAP_PATH = os.path.join(MODELS_DIR, 'movie_index_map.pkl')
LOOKUP_PATH = os.path.join(MODELS_DIR, 'final_movie_lookup_df.pkl')

In [9]:
movie_features_matrix = joblib.load(SPARSE_MATRIX_PATH)
movie_titles = joblib.load(INDEX_MAP_PATH)
final_movie_lookup_df = pd.read_pickle(LOOKUP_PATH)

print(f"Sparse matrix loaded. Shape: {movie_features_matrix.shape}")
print(f"Number of indexed movies (rows in matrix): {len(movie_titles)}")

Sparse matrix loaded. Shape: (8754, 671)
Number of indexed movies (rows in matrix): 8754


In [10]:
print("\n--- 2. Training the K-NN Model ---")

# 1. Instantiate the NearestNeighbors Model
# metric='cosine' is standard for item-item CF, measuring similarity between movies.
# algorithm='brute' is simple and reliable for this size.
knn_model = NearestNeighbors(
    n_neighbors=20, 
    metric='cosine', 
    algorithm='brute', 
    n_jobs=-1 # Use all available cores
)

# 2. Fit the model on the Sparse Matrix
# The matrix rows represent the movie features (ratings by all users).
knn_model.fit(movie_features_matrix)

print("NearestNeighbors Model training complete.")


--- 2. Training the K-NN Model ---
NearestNeighbors Model training complete.


In [11]:
def get_recommendations_by_title(model, matrix, titles_list, movie_title, k=10):
    """
    Finds the k most similar movies to a given movie title.
    """
    try:
        # Find the index of the input movie in the matrix
        query_index = titles_list.index(movie_title)
        
        # Get the row corresponding to the query movie
        query_movie_vector = matrix[query_index]

        # Use the model to find k neighbors (k+1 because the movie itself is always the first neighbor)
        distances, indices = model.kneighbors(query_movie_vector, n_neighbors=k + 1)
        
        # Flatten the arrays to simple lists
        recommendations = []
        
        # Start from index 1 to skip the input movie itself
        for i in range(1, len(indices.flatten())):
            recommended_movie_title = titles_list[indices.flatten()[i]]
            similarity_score = 1 - distances.flatten()[i] # Convert distance to similarity (1=identical, 0=no similarity)
            
            recommendations.append({
                'title': recommended_movie_title,
                'similarity': similarity_score
            })
            
        return recommendations

    except ValueError:
        return f"Error: Movie title '{movie_title}' not found in the dataset."


# --- 4. Test the Model ---
TEST_MOVIE = 'Toy Story' # A common and well-rated movie

print(f"\n--- 4. Test Predictions for: {TEST_MOVIE} ---")

recommendations = get_recommendations_by_title(
    knn_model, 
    movie_features_matrix, 
    movie_titles, 
    TEST_MOVIE, 
    k=10
)

if isinstance(recommendations, list):
    for rec in recommendations:
        print(f"- {rec['title']} (Similarity: {rec['similarity']:.4f})")
else:
    print(recommendations)


--- 4. Test Predictions for: Toy Story ---
- Toy Story 2 (Similarity: 0.5947)
- Star Wars (Similarity: 0.5762)
- Forrest Gump (Similarity: 0.5645)
- Independence Day (Similarity: 0.5629)
- Groundhog Day (Similarity: 0.5480)
- Back to the Future (Similarity: 0.5367)
- Jurassic Park (Similarity: 0.5352)
- Shrek (Similarity: 0.5327)
- Return of the Jedi (Similarity: 0.5293)
- Pulp Fiction (Similarity: 0.5269)


In [12]:
MODEL_PATH = os.path.join(MODELS_DIR, 'knn_recommender_model.joblib')

joblib.dump(knn_model, MODEL_PATH)

print(f"\nFinal K-NN Model saved to: {MODEL_PATH}")


Final K-NN Model saved to: C:\Md Shahid\Liabilities\Machine Learning Projects\Recommendation System\models\knn_recommender_model.joblib
