In [12]:
# Create movies.csv using Python inside the notebook
data = """title,genres,overview
Avatar,Action Adventure Fantasy,"A marine on an alien planet becomes torn between following his orders and protecting the world he feels is his home."
Titanic,Drama Romance,"A seventeen-year-old aristocrat falls in love with a kind but poor artist aboard the luxurious, ill-fated R.M.S. Titanic."
The Avengers,Action Science Fiction,"Earth's mightiest heroes must come together to stop a mischievous god from enslaving humanity."
Interstellar,Adventure Drama Science Fiction,"A team of explorers travel through a wormhole in space in an attempt to ensure humanity's survival."
Inception,Action Adventure Science Fiction,"A thief who steals corporate secrets through dream-sharing technology is given a chance to erase his criminal history."
The Dark Knight,Action Crime Drama,"Batman sets out to dismantle the remaining criminal organizations that plague the streets."
The Notebook,Romance Drama,"A poor yet passionate young man falls in love with a rich young woman and gives her a sense of freedom."
Avengers: Age of Ultron,Action Adventure Science Fiction,"When Tony Stark and Bruce Banner try to jump-start a dormant peacekeeping program, things go awry."
The Martian,Adventure Drama Science Fiction,"An astronaut becomes stranded on Mars and must rely on his ingenuity to survive."
Gravity,Science Fiction Thriller,"A medical engineer and an astronaut work together to survive after an accident leaves them adrift in space."
"""

# Write to file
with open("movies.csv", "w", encoding="utf-8") as file:
    file.write(data)

print("✅ movies.csv file created successfully!")


✅ movies.csv file created successfully!


In [13]:
!pip install pandas scikit-learn




In [14]:
# Step 4: Fill missing values (just in case)
df['genres'] = df['genres'].fillna('')
df['overview'] = df['overview'].fillna('')

# Step 5: Combine genres and overview into one column
df['combined_features'] = df['genres'] + ' ' + df['overview']

# Step 6: Check the combined feature
df[['title', 'combined_features']].head()


Unnamed: 0,title,combined_features
0,Avatar,Action Adventure Fantasy A marine on an alien ...
1,Titanic,Drama Romance A seventeen-year-old aristocrat ...
2,The Avengers,Action Science Fiction Earth's mightiest heroe...
3,Interstellar,Adventure Drama Science Fiction A team of expl...
4,Inception,Action Adventure Science Fiction A thief who s...


In [15]:
# Step 7: Convert text to vectors using CountVectorizer
cv = CountVectorizer()
vectors = cv.fit_transform(df['combined_features'])

# Optional: Show vector shape (rows, unique words)
print("Vector shape:", vectors.shape)



Vector shape: (10, 128)


In [16]:
#Calculate cosine similarity between all movie vectors
similarity = cosine_similarity(vectors)
# Optional: Show similarity of first movie with all others
print(similarity[0]) # Similarity of "Avatar" with other movies

[1.         0.04472136 0.05       0.08728716 0.21821789 0.14142136
 0.04264014 0.13416408 0.35777088 0.12792043]


In [18]:
# Step 9: Recommendation function
def recommend(movie_name):
    movie_name = movie_name.lower()  # lowercase for matching
    found = False
    
    for i in range(len(df)):
        if df.iloc[i]['title'].lower() == movie_name:
            found = True
            index = i
            break
            
    if not found:
        print("❌ Movie not found in database!")
        return
    
    # Get similarity scores
    sim_scores = list(enumerate(similarity[index]))
    
    # Sort based on similarity score (descending)
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    
    # Skip the first one (it's the same movie), show next 5
    print(f"\n🎬 Because you watched '{df.iloc[index]['title']}', you may also like:")
    for i in sim_scores[1:6]:
        print("👉", df.iloc[i[0]]['title'])


In [19]:
#recommend("Avatar")



🎬 Because you watched 'Avatar', you may also like:
👉 The Martian
👉 Inception
👉 The Dark Knight
👉 Avengers: Age of Ultron
👉 Gravity


In [20]:
# Ask user for movie name and recommend
user_movie = input("🎥 Enter a movie name: ")
recommend(user_movie)


🎥 Enter a movie name:  hum tum


❌ Movie not found in database!
