<a href="https://colab.research.google.com/github/ChitrarthVasdev/CODSOFT2/blob/main/movierecommend.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

TASK 4 : RECOMMENDATION SYSTEM (MOVIES)

DOMAIN : ARTIFICIAL INTELLIGENCE

LANGUAGE : PYTHON

In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

movies_df = pd.read_csv('/content/imdb_top_1000.csv')
print(movies_df.head())


                                         Poster_Link  \
0  https://m.media-amazon.com/images/M/MV5BMDFkYT...   
1  https://m.media-amazon.com/images/M/MV5BM2MyNj...   
2  https://m.media-amazon.com/images/M/MV5BMTMxNT...   
3  https://m.media-amazon.com/images/M/MV5BMWMwMG...   
4  https://m.media-amazon.com/images/M/MV5BMWU4N2...   

               Series_Title Released_Year Certificate  Runtime  \
0  The Shawshank Redemption          1994           A  142 min   
1             The Godfather          1972           A  175 min   
2           The Dark Knight          2008          UA  152 min   
3    The Godfather: Part II          1974           A  202 min   
4              12 Angry Men          1957           U   96 min   

                  Genre  IMDB_Rating  \
0                 Drama          9.3   
1          Crime, Drama          9.2   
2  Action, Crime, Drama          9.0   
3          Crime, Drama          9.0   
4          Crime, Drama          9.0   

                         

In [10]:
# Keep and clean relevant columns
movies = movies_df[['Series_Title', 'Genre']].copy()
movies.columns = ['title', 'genres']
movies.dropna(subset=['genres'], inplace=True)

# Normalize titles for case-insensitive matching
movies['title_lower'] = movies['title'].str.lower()

# TF-IDF Vectorization on genres
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(movies['genres'])

# Compute cosine similarity matrix
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Title-to-index mapping (using lowercase titles)
indices = pd.Series(movies.index, index=movies['title_lower']).drop_duplicates()

# Recommendation function with case-insensitive input
def get_recommendations(title, top_n=10):
    title_lower = title.lower()
    if title_lower not in indices:
        raise ValueError(f"❌ Movie '{title}' not found in the dataset. Please check the spelling.")

    idx = indices[title_lower]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:top_n+1]
    movie_indices = [i[0] for i in sim_scores]
    return movies['title'].iloc[movie_indices].tolist()

# Main program with user input
if __name__ == "__main__":
    print("🎬 Welcome to the Movie Recommendation System!")
    try:
        user_input = input("Enter a movie title: ").strip()
        recommendations = get_recommendations(user_input)

        print(f"\nTop recommendations similar to '{user_input}':")
        for i, movie in enumerate(recommendations, 1):
            print(f"{i}. {movie}")
    except ValueError as e:
        print(e)


🎬 Welcome to the Movie Recommendation System!
Enter a movie title: joker

Top recommendations similar to 'joker':
1. Joker
2. The Departed
3. Drishyam
4. Reservoir Dogs
5. Eskiya
6. Heat
7. Du rififi chez les hommes
8. No Country for Old Men
9. Fargo
10. On the Waterfront
