In [None]:
%pip install pinecone sentence-transformers datasets

In [67]:
from datasets import load_dataset

dataset = load_dataset("MongoDB/embedded_movies")

In [68]:
dataset

DatasetDict({
    train: Dataset({
        features: ['plot', 'runtime', 'genres', 'fullplot', 'directors', 'writers', 'countries', 'poster', 'languages', 'cast', 'title', 'num_mflix_comments', 'rated', 'imdb', 'awards', 'type', 'metacritic', 'plot_embedding'],
        num_rows: 1500
    })
})

In [69]:
import pandas as pd

In [70]:
dataset_df = pd.DataFrame(dataset['train'])

In [71]:
dataset_df.head()

Unnamed: 0,plot,runtime,genres,fullplot,directors,writers,countries,poster,languages,cast,title,num_mflix_comments,rated,imdb,awards,type,metacritic,plot_embedding
0,Young Pauline is left a lot of money when her ...,199.0,[Action],Young Pauline is left a lot of money when her ...,"[Louis J. Gasnier, Donald MacKenzie]","[Charles W. Goddard (screenplay), Basil Dickey...",[USA],https://m.media-amazon.com/images/M/MV5BMzgxOD...,[English],"[Pearl White, Crane Wilbur, Paul Panzer, Edwar...",The Perils of Pauline,0,,"{'id': 4465, 'rating': 7.6, 'votes': 744}","{'nominations': 0, 'text': '1 win.', 'wins': 1}",movie,,"[0.0007293965299999999, -0.026834568000000003,..."
1,A penniless young man tries to save an heiress...,22.0,"[Comedy, Short, Action]",As a penniless man worries about how he will m...,"[Alfred J. Goulding, Hal Roach]",[H.M. Walker (titles)],[USA],https://m.media-amazon.com/images/M/MV5BNzE1OW...,[English],"[Harold Lloyd, Mildred Davis, 'Snub' Pollard, ...",From Hand to Mouth,0,TV-G,"{'id': 10146, 'rating': 7.0, 'votes': 639}","{'nominations': 1, 'text': '1 nomination.', 'w...",movie,,"[-0.022837115, -0.022941574000000003, 0.014937..."
2,"Michael ""Beau"" Geste leaves England in disgrac...",101.0,"[Action, Adventure, Drama]","Michael ""Beau"" Geste leaves England in disgrac...",[Herbert Brenon],"[Herbert Brenon (adaptation), John Russell (ad...",[USA],,[English],"[Ronald Colman, Neil Hamilton, Ralph Forbes, A...",Beau Geste,0,,"{'id': 16634, 'rating': 6.9, 'votes': 222}","{'nominations': 0, 'text': '1 win.', 'wins': 1}",movie,,"[0.00023330492999999998, -0.028511643000000003..."
3,"Seeking revenge, an athletic young man joins t...",88.0,"[Adventure, Action]",A nobleman vows to avenge the death of his fat...,[Albert Parker],"[Douglas Fairbanks (story), Jack Cunningham (a...",[USA],https://m.media-amazon.com/images/M/MV5BMzU0ND...,,"[Billie Dove, Tempe Pigott, Donald Crisp, Sam ...",The Black Pirate,1,,"{'id': 16654, 'rating': 7.2, 'votes': 1146}","{'nominations': 0, 'text': '1 win.', 'wins': 1}",movie,,"[-0.005927917, -0.033394486, 0.0015323418, -0...."
4,An irresponsible young millionaire changes his...,58.0,"[Action, Comedy, Romance]","The Uptown Boy, J. Harold Manners (Lloyd) is a...",[Sam Taylor],"[Ted Wilde (story), John Grey (story), Clyde B...",[USA],https://m.media-amazon.com/images/M/MV5BMTcxMT...,[English],"[Harold Lloyd, Jobyna Ralston, Noah Young, Jim...",For Heaven's Sake,0,PASSED,"{'id': 16895, 'rating': 7.6, 'votes': 918}","{'nominations': 1, 'text': '1 nomination.', 'w...",movie,,"[-0.0059373598, -0.026604708, -0.0070914757000..."


In [72]:
dataset_df.columns

Index(['plot', 'runtime', 'genres', 'fullplot', 'directors', 'writers',
       'countries', 'poster', 'languages', 'cast', 'title',
       'num_mflix_comments', 'rated', 'imdb', 'awards', 'type', 'metacritic',
       'plot_embedding'],
      dtype='object')

In [73]:
dataset_df["fullplot"].isnull().sum()

np.int64(48)

In [76]:
dataset_df = dataset_df.dropna(subset=["fullplot"])

In [77]:
dataset_df = dataset_df.drop(columns=['plot_embedding'])

In [78]:
from sentence_transformers import SentenceTransformer

In [None]:
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

In [61]:
def get_embedding(text):
  if not text.strip():
    print("Attempted to get embedding for empty string")
    return []
  embedding = embedding_model.encode(text)
  return embedding.tolist()

In [79]:
dataset_df["plot_embedding"] = dataset_df["fullplot"].apply(get_embedding)

In [80]:
from google.colab import userdata
mongo_db_uri = userdata.get('mongo_db_uri')

In [None]:
%pip install pymongo

In [81]:
from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi

uri = mongo_db_uri

client = MongoClient(uri, server_api=ServerApi('1'))

try:
    client.admin.command('ping')
    print("Pinged your deployment. You successfully connected to MongoDB!")
except Exception as e:
    print(e)

Pinged your deployment. You successfully connected to MongoDB!


In [82]:
db = client["movie_db"]

In [83]:
collection = db["collection02"]

In [84]:
document = dataset_df.to_dict("records")

In [None]:
collection.insert_many(document)

In [25]:
from google.colab import userdata
PINECONE_API_KEY = userdata.get('PINECONE_API_KEY')

In [86]:
from pinecone import Pinecone

pc = Pinecone(api_key=PINECONE_API_KEY)
index = pc.Index("genai-test-index")

In [87]:
def get_result(user_prompt, top_k):
  user_embedding = get_embedding(user_prompt)
  embedding = user_embedding
  response = index.query(
      vector=embedding,
      top_k=top_k
  )
  return response

In [88]:
query = "Which one is the best movie based on the plot for children?"

In [91]:
response = get_result(query, 5)

In [None]:
response

In [94]:
from bson.objectid import ObjectId

In [97]:
mylist = []
for i in range(len(response["matches"])):
  value = response["matches"][i]['id']
  mylist.append(collection.find_one({"_id": ObjectId(value)}))

In [None]:
mylist

In [100]:
retrieved_info = ""
for i in range(len(mylist)):
  fullplot = mylist[i]['fullplot']
  title = mylist[i]['title']
  retrieved_info += f"Title: {title}, Plot: {fullplot}\n"

In [None]:
%pip install -U langchain-google-genai

In [105]:
from google.colab import userdata
import os

GEMINI_API_KEY = userdata.get('GEMINI_API_KEY')
os.environ['GEMINI_API_KEY'] = GEMINI_API_KEY

In [106]:
prompt = f"Query: {query}\nContinue to answer the query by using the plot only:\n{retrieved_info}."

In [108]:
from langchain_google_genai import ChatGoogleGenerativeAI

model = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash",
    temperature=1.0,
    max_tokens=None,
    timeout=None,
    max_retries=2
)

messages = [
    (
        "system",
        "You are a helpful assistant that analyzes the title and plot of movies provided and answer the asked queries.",
    ),
    ("human", prompt),
]

ai_msg = model.invoke(messages)

In [111]:
print(ai_msg.content)

Based on the plot descriptions:

*   **Fortress:** The plot involves children responding with "cruelty and fury extending that of their oppressors" after being kidnapped, which sounds too dark and intense for children.
*   **Amar Akbar Anthony:** While it has songs and a happy ending, the plot involves abandonment, a mother going blind, mob bosses, and "furious action sequences," which might be too heavy or complex for young children.
*   **The Karate Kid, Part II:** This is a continuation of a series and while it involves rivalry and making enemies, it's generally suitable for children, especially fans of the first movie.
*   **Space Raiders:** This features a 10-year-old protagonist, space adventure, and a "sensitive tale" despite the accidental kidnapping. It sounds like a fun, kid-friendly adventure.
*   **Cloak & Dagger:** This plot focuses on an 11-year-old who goes on a spy adventure with an imaginary friend, learning to rely on himself, and ultimately finding his real hero in h