In [6]:
# import Libraries
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from IPython.display import display, HTML

# Step 2: Telugu Movie Dataset (With image URLs)
data = {
    'title': [
        'Bahubali',
        'Magadheera',
        'Arjun Reddy',
        'Jersey',
        'Pushpa',
        'Eega',
        'Ala vaikuntapurramlo',
        'Sita Ramam'
    ],
    'description': [
        'An epic story of a lost prince who returns to claim his kingdom.',
        'A warrior’s soul travels through time to protect his love.',
        'A brilliant surgeon struggles with heartbreak and addiction.',
        'A failed cricketer attempts a comeback in his 30s for his son.',
        'A red sandalwood smuggler rises in the world of crime.',
        'A man reincarnates as a housefly to take revenge.',
        'A middle-class man discovers his real father is a billionaire.',
        'A beautiful love story between a soldier and a mysterious girl.'
    ],
    'image_url': [
        'https://upload.wikimedia.org/wikipedia/en/9/93/Baahubali_2_The_Conclusion_poster.jpg',
        'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRH1Agl1xfrqTCAQtOz0trNy_zey_qNb2Mfhw&s',
        'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSj7qS4HyAJMSoN7v-LN94Z9FRjihjoTVxkZQ&s',
        'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTLlw8baHHCHmyep0kkb3QzWUqzeR22-4EY2A&s',
        'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQq5IPZvHFhmkWBIUZe0RN_DTaKtFZ1tV3K_Q&s',
        'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQ9iYzApvIl9E02liC0jbhpUuxh-Vh2ZkZ7Pw&s',
        'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTPxE543KQv15EoJr_a5r8YYh5Z0ErVWnN9_g&s',
        'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQzfEbvajCJHnri0mt9diObUvxGkKQMu2ar4A&s',
    ]
}

df = pd.DataFrame(data)

# Step 3: Text Vectorization using TF-IDF
vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = vectorizer.fit_transform(df['description'])

# Step 4: Cosine Similarity
similarity = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Step 5: Recommendation Function
def recommend(movie_title, top_n=5):
    if movie_title not in df['title'].values:
        return "❌ Movie not found in the database. Please try another Telugu movie."

    idx = df[df['title'] == movie_title].index[0]
    scores = list(enumerate(similarity[idx]))
    scores = sorted(scores, key=lambda x: x[1], reverse=True)
    recommended_indices = [i[0] for i in scores[1:top_n+1]]

    selected_movie = df.iloc[idx]
    result = df[['title', 'image_url']].iloc[recommended_indices]
    return selected_movie, result

# Step 6: Run Recommendation
movie_to_search = input("🎬 Enter a Telugu movie name: ")
output = recommend(movie_to_search)

# Step 7: Display Results with HTML
if isinstance(output, str):
    print(output)
else:
    selected_movie, recommendations = output

    display(HTML(f"""
        <h2>🎬 Selected Movie: {selected_movie['title']}</h2>
        <img src="{selected_movie['image_url']}" width="1000"><br><br>
        <h3>📽 If you liked this movie, you might also enjoy:</h3>
        <ul>
            {''.join([f'<li><a href="{row["image_url"]}" target="_blank">{row["title"]}</a></li>' for idx, row in recommendations.iterrows()])}
        </ul>
    """))

🎬 Enter a Telugu movie name: Sita Ramam
