In [1]:
import numpy as np
import faiss
import mysql.connector
from sentence_transformers import SentenceTransformer

# Connection to MySQL

In [4]:
conn = mysql.connector.connect(host = "localhost", user = "root", password="", database="movie")
cursor = conn.cursor()

# Load Sentence-BERT model


In [5]:
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

# Loading movies with description

In [6]:
movies = [
    ("Toy Story", "Adventure, Animation, Children, Comedy, Fantasy"),
    ("Jumanji", "Adventure, Children, Fantasy"),
    ("Grumpier Old Men", "Comedy, Romance"),
    ("Waiting to Exhale", "Comedy, Drama, Romance"),
    ("Father of the Bride Part II", "Comedy"),
    ("Heat", "Action, Crime, Thriller"),
    ("Sabrina", "Comedy, Romance"),
    ("Tom and Huck", "Adventure, Children"),
    ("Sudden Death", "Action"),
    ("GoldenEye", "Action, Adventure, Thriller"),
    ("The American President", "Comedy, Drama, Romance"),
    ("Dracula: Dead and Loving It", "Comedy, Horror"),
    ("Balto", "Adventure, Animation, Children"),
    ("Nixon", "Drama"),
    ("Cutthroat Island", "Action, Adventure, Romance"),
    ("Casino", "Crime, Drama"),
    ("Sense and Sensibility", "Drama, Romance"),
    ("Four Rooms", "Comedy"),
    ("Ace Ventura: When Nature Calls", "Comedy"),
    ("Money Train", "Action, Comedy, Crime, Drama, Thriller"),
    ("Get Shorty", "Comedy, Crime, Thriller"),
    ("Copycat", "Crime, Drama, Horror, Mystery, Thriller"),
    ("Assassins", "Action, Crime, Thriller"),
    ("Powder", "Drama, Sci-Fi"),
    ("Leaving Las Vegas", "Drama, Romance"),
    ("Othello", "Drama"),
    ("Now and Then", "Children, Drama"),
    ("Persuasion", "Drama, Romance"),
    ("The City of Lost Children", "Adventure, Drama, Fantasy, Mystery, Sci-Fi"),
    ("Shanghai Triad", "Crime, Drama"),
    ("Dangerous Minds", "Drama"),
    ("Twelve Monkeys", "Mystery, Sci-Fi, Thriller"),
    ("Wings of Courage", "Adventure, Romance, IMAX"),
    ("Babe", "Children, Drama"),
    ("Carrington", "Drama, Romance"),
    ("Dead Man Walking", "Crime, Drama"),
    ("Across the Sea of Time", "Documentary, IMAX"),
    ("It Takes Two", "Children, Comedy"),
    ("Clueless", "Comedy, Romance"),
    ("Cry, the Beloved Country", "Drama"),
    ("Richard III", "Drama, War"),
    ("Dead Presidents", "Action, Crime, Drama"),
    ("Restoration", "Drama"),
    ("Mortal Kombat", "Action, Adventure, Fantasy"),
    ("To Die For", "Comedy, Drama, Thriller"),
    ("How to Make an American Quilt", "Drama, Romance"),
    ("Seven", "Mystery, Thriller"),
    ("Pocahontas", "Animation, Children, Drama, Musical, Romance"),
    ("When Night Is Falling", "Drama, Romance"),
    ("The Usual Suspects", "Crime, Mystery, Thriller"),
    ("Guardian Angel", "Action, Drama, Thriller"),
    ("Mighty Aphrodite", "Comedy, Drama, Romance"),
    ("Lamerica", "Adventure, Drama"),
    ("The Big Green", "Children, Comedy"),
]

# Inserting movies and description to the SQL Table

In [8]:
for name, description in movies:
    # Combine name + description for embedding
    text = name + " " + description
    embedding = embedding_model.encode([text])[0].tobytes()  # Convert embedding to bytes
    cursor.execute(
        "INSERT INTO movie(Movies, Description, Embeddings) VALUES (%s, %s, %s)",
        (name, description, embedding)
    )
conn.commit()
print("✅ Movies with embeddings inserted into the database.")

✅ Movies with embeddings inserted into the database.


# Convert the input text into embeddings

In [7]:
def get_embedding(text):
    """Convert a text query to an embedding vector."""
    return embedding_model.encode([text])[0]

In [16]:
def search_movies(query, top_k=5):
    """Retrieve the most relevant movie names based on description similarity."""
    # Fetch movies and embeddings from the database
    cursor.execute("SELECT Movies, Description, Embeddings FROM movie")
    movies = cursor.fetchall()

    # Load stored embeddings
    movie_data, movie_vectors = [], []
    for movie, description, embedding in movies:
        try:
            movie_data.append(movie)  # Only store movie names
            movie_vectors.append(np.frombuffer(embedding, dtype=np.float32))  # Convert embeddings
        except TypeError:
            print(f"Invalid embedding for movie: {movie}. Skipping...")

    if not movie_vectors:
        return "No movies found."  # Handle empty database case

    # Create a FAISS index
    embedding_dim = len(movie_vectors[0])
    index = faiss.IndexFlatL2(embedding_dim)  # L2 distance for similarity
    index.add(np.array(movie_vectors))  # Add movie vectors

    # Convert query description into embedding
    query_embedding = get_embedding(query)  # Encode the query description

    # Search for top-k movies
    distances, indices = index.search(np.array([query_embedding]), top_k)

    # Gather movie names based on indices
    results = [movie_data[idx] for idx in indices[0] if idx < len(movie_data)]
    return results


In [17]:
new_text = input("Enter a movie description: ")  
results = search_movies(new_text)  

print("🔎 Search Results:")
if results:
    for i, name in enumerate(results, 1):
        print(f"{i}. {name}")
else:
    print("No matching movies found.")

🔎 Search Results:
1. Copycat
2. Seven
3. Dracula: Dead and Loving It
4. The Usual Suspects
5. Heat
