In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors

In [None]:
file_path = 'D:\\Webdev\\Projects\\NavShiksha\\BackendRecSys\\interaction_data.csv'
dataset = pd.read_csv(file_path)

In [None]:
dataset.info()

In [None]:
dataset = dataset.drop_duplicates(subset='video_title', keep='first').reset_index(drop=True)


In [None]:
dataset.head(5)

In [None]:
dataset.tail(5)

In [None]:
dataset['content'] = dataset['video_title'] + dataset['video_description'] + dataset['video_tags']


In [None]:
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf_vectorizer.fit_transform(dataset['content'])

In [None]:
nn_model = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=10)  # More neighbors to handle duplicates
nn_model.fit(tfidf_matrix)

In [None]:
def recommend_videos(video_title, num_recommendations=5):
    try:
        idx = dataset[dataset['video_title'] == video_title].index[0]
    except IndexError:
        return "Video title not found in the dataset."

    distances, indices = nn_model.kneighbors(tfidf_matrix[idx], n_neighbors=10)  

    similar_indices = indices.flatten()[1:]

    recommended_videos = dataset.iloc[similar_indices][['video_title', 'video_link']].drop_duplicates().head(num_recommendations)

    if recommended_videos.empty:
        return "No sufficient recommendations available."
    
    return recommended_videos.to_dict(orient='records')

In [None]:
example_title = dataset['video_title'].iloc[69]  
recommendations = recommend_videos(example_title, num_recommendations=5)

In [None]:
print("Recommendations:")
for idx, rec in enumerate(recommendations, 1):
    print(f"{idx}. {rec['video_title']} - {rec['video_link']}")

In [None]:
!pip install joblib

In [None]:
import joblib
joblib.dump(nn_model, 'nearest_neighbors_model.pkl')
joblib.dump(tfidf_vectorizer, 'tfidf_vectorizer.pkl')

print("Model and vectorizer saved successfully!")