In [None]:
!pip install qdrant-client

In [None]:
!pip install sentence_transformers

In [None]:
!pip install OpenAI

In [None]:
import pandas as pd
from qdrant_client import models, QdrantClient
from sentence_transformers import SentenceTransformer
from openai import OpenAI



#Movie Dataset Exploration

In [None]:
df = pd.read_csv('movie_dataset.csv')

In [None]:
df.describe()

Unnamed: 0,index,budget,id,popularity,revenue,runtime,vote_average,vote_count
count,4803.0,4803.0,4803.0,4803.0,4803.0,4801.0,4803.0,4803.0
mean,2401.0,29045040.0,57165.484281,21.492301,82260640.0,106.875859,6.092172,690.217989
std,1386.651002,40722390.0,88694.614033,31.81665,162857100.0,22.611935,1.194612,1234.585891
min,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0
25%,1200.5,790000.0,9014.5,4.66807,0.0,94.0,5.6,54.0
50%,2401.0,15000000.0,14629.0,12.921594,19170000.0,103.0,6.2,235.0
75%,3601.5,40000000.0,58610.5,28.313505,92917190.0,118.0,6.8,737.0
max,4802.0,380000000.0,459488.0,875.581305,2787965000.0,338.0,10.0,13752.0


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4803 entries, 0 to 4802
Data columns (total 24 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   index                 4803 non-null   int64  
 1   budget                4803 non-null   int64  
 2   genres                4775 non-null   object 
 3   homepage              1712 non-null   object 
 4   id                    4803 non-null   int64  
 5   keywords              4391 non-null   object 
 6   original_language     4803 non-null   object 
 7   original_title        4803 non-null   object 
 8   overview              4800 non-null   object 
 9   popularity            4803 non-null   float64
 10  production_companies  4803 non-null   object 
 11  production_countries  4803 non-null   object 
 12  release_date          4802 non-null   object 
 13  revenue               4803 non-null   int64  
 14  runtime               4801 non-null   float64
 15  spoken_languages     

In [None]:
target_columns = ['genres',
                  'keywords',
                  'title',
                  'overview',
                  'release_date',
                  'runtime',
                  'vote_average',
                  'revenue',
                  'cast',
                  'director']
df = df[target_columns].dropna()
df

Unnamed: 0,genres,keywords,title,overview,release_date,runtime,vote_average,revenue,cast,director
0,Action Adventure Fantasy Science Fiction,culture clash future space war space colony so...,Avatar,"In the 22nd century, a paraplegic Marine is di...",2009-12-10,162.0,7.2,2787965087,Sam Worthington Zoe Saldana Sigourney Weaver S...,James Cameron
1,Adventure Fantasy Action,ocean drug abuse exotic island east india trad...,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",2007-05-19,169.0,6.9,961000000,Johnny Depp Orlando Bloom Keira Knightley Stel...,Gore Verbinski
2,Action Adventure Crime,spy based on novel secret agent sequel mi6,Spectre,A cryptic message from Bond’s past sends him o...,2015-10-26,148.0,6.3,880674609,Daniel Craig Christoph Waltz L\u00e9a Seydoux ...,Sam Mendes
3,Action Crime Drama Thriller,dc comics crime fighter terrorist secret ident...,The Dark Knight Rises,Following the death of District Attorney Harve...,2012-07-16,165.0,7.6,1084939099,Christian Bale Michael Caine Gary Oldman Anne ...,Christopher Nolan
4,Action Adventure Science Fiction,based on novel mars medallion space travel pri...,John Carter,"John Carter is a war-weary, former military ca...",2012-03-07,132.0,6.1,284139100,Taylor Kitsch Lynn Collins Samantha Morton Wil...,Andrew Stanton
...,...,...,...,...,...,...,...,...,...,...
4795,Drama,gang audition police fake homeless actress,Bang,A young woman in L.A. is having a bad day: she...,1995-09-09,98.0,6.0,0,Darling Narita Peter Greene Michael Newland Er...,Ash Baron-Cohen
4796,Science Fiction Drama Thriller,distrust garage identity crisis time travel ti...,Primer,Friends/fledgling entrepreneurs invent a devic...,2004-10-08,77.0,6.9,424760,Shane Carruth David Sullivan Casey Gooden Anan...,Shane Carruth
4798,Action Crime Thriller,united states\u2013mexico barrier legs arms pa...,El Mariachi,El Mariachi just wants to play his guitar and ...,1992-09-04,81.0,6.6,2040920,Carlos Gallardo Jaime de Hoyos Peter Marquardt...,Robert Rodriguez
4800,Comedy Drama Romance TV Movie,date love at first sight narration investigati...,"Signed, Sealed, Delivered","""Signed, Sealed, Delivered"" introduces a dedic...",2013-10-13,120.0,7.0,0,Eric Mabius Kristin Booth Crystal Lowe Geoff G...,Scott Smith


# Store Records into Vector Database

In [None]:
data = df.to_dict('records')

In [None]:
encoder = SentenceTransformer('all-MiniLM-L6-v2') # Model to create embeddings

In [None]:
# create the vector database client
qdrant = QdrantClient(":memory:") # Create in-memory Qdrant instance

In [None]:
# Create collection to store books
qdrant.recreate_collection(
    collection_name="movies",
    vectors_config=models.VectorParams(
        size=encoder.get_sentence_embedding_dimension(),
        distance=models.Distance.COSINE
    )
)

  qdrant.recreate_collection(


True

In [None]:
qdrant.upload_records(
    collection_name="movies",
    records=[
        models.Record(
            id=idx,
            vector=(encoder.encode(doc["overview"]) + encoder.encode(doc["genres"]) + encoder.encode(doc["keywords"])).tolist(),
            payload=doc
        ) for idx, doc in enumerate(data)
    ]
)

  qdrant.upload_records(


In [None]:
hits = qdrant.search(
    collection_name="movies",
    query_vector=encoder.encode("An entertaining family friendly movie.").tolist(),
    limit=3
)
for hit in hits:
  print(hit.payload, "score:", hit.score)

{'genres': 'Comedy Drama', 'keywords': 'family secrets dysfunctional family independent film death in family family conflict', 'title': 'Eulogy', 'overview': 'A black comedy that follows three generations of a family, who come together for the funeral of the patriarch - unveiling a litany of family secrets and covert relationships.', 'release_date': '2004-10-15', 'runtime': 91.0, 'vote_average': 6.4, 'revenue': 0, 'cast': 'Hank Azaria Jesse Bradford Zooey Deschanel Glenne Headly Famke Janssen', 'director': 'Michael Clancy'} score: 0.6936044276545827
{'genres': 'Comedy Family Fantasy', 'keywords': 'london england dancing parents kids relationship bank brother sister relationship', 'title': 'Mary Poppins', 'overview': 'The movie combines a diverting story, songs, color and sequences of live action blended with the movements of animated figures. Mary Poppins is a kind of Super-nanny who flies in with her umbrella in response to the request of the Banks children and proceeds to put things 

# User Query & Response Chat Generation

In [None]:
client = OpenAI(
    base_url="http://127.0.0.1:8080/v1",
    api_key = "sk-no-key-required"
)

In [None]:
user_request = 'Please suggest me an amazing action robot and mech movies to be watched with my family this weekend.'

In [None]:
# Search relevant movies
hits = qdrant.search(
    collection_name="movies",
    query_vector=encoder.encode(user_request).tolist(),
    limit=5
)
search_results = [hit.payload for hit in hits if hit.payload.get('score', 0) > 0.5]
if len(search_results)<2:
    search_results = [hit.payload for hit in hits]
print(search_results[0])


{'genres': 'Science Fiction Action Adventure', 'keywords': 'sequel alien transformers giant robot robot', 'title': 'Transformers: Age of Extinction', 'overview': 'As humanity picks up the pieces, following the conclusion of "Transformers: Dark of the Moon," Autobots and Decepticons have all but vanished from the face of the planet. However, a group of powerful, ingenious businessman and scientists attempt to learn from past Transformer incursions and push the boundaries of technology beyond what they can control - all while an ancient, powerful Transformer menace sets Earth in his cross-hairs.', 'release_date': '2014-06-25', 'runtime': 165.0, 'vote_average': 5.8, 'revenue': 1091405097, 'cast': 'Mark Wahlberg Stanley Tucci Kelsey Grammer Nicola Peltz Jack Reynor', 'director': 'Michael Bay'}


In [None]:
# Generate movies recommendation chat
completion = client.chat.completions.create(
    model="LLaMA_CPP",
    messages=[
        {"role": "system", "content": "You are chatbot, a movie specialist. Your top priority is to help guide users into selecting amazing relevant and interesting movie and guide them with their requests."},
        {"role": "user", "content": user_request},
        {"role": "assistant", "content": str(search_results)}
    ]
)
response = completion.choices[0].message

In [None]:
print(response.content)

Here are some amazing action robot and mech movies to watch with your family this weekend:

1. Transformers: Age of Extinction (2014) - A group of powerful businessmen and scientists attempt to learn from past Transformer incursions and push the boundaries of technology beyond what they can control.
2. Zathura: A Space Adventure (2005) - After playing a space-themed board game, two young boys are shot into space and must work with an astronaut to return home.
3. Astro Boy (2009) - A young robot with incredible powers created in the image of a son goes on a journey to find acceptance and battles betrayal and a netherworld of robot gladiators.
4. Aliens in the Attic (2009) - A group of youngsters must band together to defeat aliens and save the world while keeping the whole thing a secret from their parents.
5. WALL·E (2008) - A lonely robot on a mission to clean up Earth finds a new friend when a sleek new type of robot arrives.

These movies offer a mix of action, adventure, and heart,