<a href="https://colab.research.google.com/github/Morris136/Homework-/blob/main/Untitled1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import sys
import numpy as np
import pandas as pd
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

DATASET_PATH = 'movie_plots.csv'
EMBEDDINGS_PATH = 'movie_plots_embeddings.npy'

def load_dataset():
    if not os.path.exists(DATASET_PATH):
        print(f"Dataset file '{DATASET_PATH}' not found. Please provide the dataset.")
        sys.exit(1)
    df = pd.read_csv(DATASET_PATH)
    if 'plot' not in df.columns or 'title' not in df.columns:
        print("Dataset must contain 'title' and 'plot' columns.")
        sys.exit(1)
    return df

def load_or_create_embeddings(df, model):
    if os.path.exists(EMBEDDINGS_PATH):
        print(f"Loading precomputed embeddings from '{EMBEDDINGS_PATH}'...")
        embeddings = np.load(EMBEDDINGS_PATH)
    else:
        print("Generating embeddings for dataset...")
        embeddings = model.encode(df['plot'].tolist(), show_progress_bar=True)
        embeddings = np.array(embeddings)
        np.save(EMBEDDINGS_PATH, embeddings)
        print(f"Embeddings saved to '{EMBEDDINGS_PATH}'.")
    return embeddings

def find_top_k_similar(embedding, embeddings_matrix, df, top_k=5):
    embedding = embedding.reshape(1, -1)
    similarities = cosine_similarity(embedding, embeddings_matrix)[0]
    top_k_idx = similarities.argsort()[-top_k:][::-1]
    results = [(df.iloc[i]['title'], similarities[i]) for i in top_k_idx]
    return results

def main():
    print("Loading dataset...")
    df = load_dataset()

    print("Loading embedding model...")
    model = SentenceTransformer('all-MiniLM-L6-v2')

    embeddings = load_or_create_embeddings(df, model)

    print("\n=== Movie Recommendation CLI ===")
    print("Enter a movie plot or description (or 'exit' to quit):")

    while True:
        user_input = input("\nYour input: ").strip()
        if user_input.lower() in ['exit', 'quit']:
            print("Exiting. Goodbye!")
            break
        if len(user_input) < 5:
            print("Please enter a longer text for meaningful recommendations.")
            continue

        user_embedding = model.encode([user_input])[0]
        recommendations = find_top_k_similar(user_embedding, embeddings, df, top_k=5)

        print("\nTop 5 recommended movies based on your input:")
        for i, (title, score) in enumerate(recommendations, 1):
            print(f"{i}. {title} (similarity: {score:.4f})")

if __name__ == '__main__':
    main()
