In [None]:
'''
import zipfile

# Replace 'your_file.zip' with the name of your uploaded zip file
zip_file_path = 'finetuned_distilbert_qa.zip'

with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall('ex_finetuned_distilbert_qa')  # 'extracted_folder' is where files will be extracted
'''

In [None]:
'''
import zipfile

# Replace 'your_file.zip' with the name of your uploaded zip file
zip_file_path = 'fine_tuned_distilbert_model_final-20241213T012225Z-001.zip'

with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall('ex_finetuned_distilbert_genre')  # 'extracted_folder' is where files will be extracted
'''

In [1]:
import zipfile

# Replace 'your_file.zip' with the name of your uploaded zip file
zip_file_path = '/content/Stat_Software_Project_demo.zip'

with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall('ex_Stat_Software_Project_demo')  # 'extracted_folder' is where files will be extracted


In [10]:
from sentence_transformers import SentenceTransformer
import torch
from transformers import DistilBertForSequenceClassification, DistilBertTokenizer, DistilBertForQuestionAnswering

# Load pre-trained models from file paths
genre_model_path = "/content/ex_Stat_Software_Project_demo/ex_finetuned_distilbert_genre/fine_tuned_distilbert_model_final"  # Update with your file path
qa_model_path = "/content/ex_Stat_Software_Project_demo/ex_finetuned_distilbert_qa"  # Update with your file path

genre_model = DistilBertForSequenceClassification.from_pretrained(genre_model_path)
genre_tokenizer = DistilBertTokenizer.from_pretrained(genre_model_path)

qa_model = DistilBertForQuestionAnswering.from_pretrained(qa_model_path)
qa_tokenizer = DistilBertTokenizer.from_pretrained(qa_model_path)

# Load semantic model for matching
semantic_model = SentenceTransformer('all-MiniLM-L6-v2')

# Assuming FAISS index and titles mapping are already loaded
import faiss
index = faiss.read_index("/content/ex_Stat_Software_Project_demo/movie_plots_index.faiss")
#res = faiss.StandardGpuResources()
#gpu_index = faiss.index_cpu_to_gpu(res, 0, index)

# Load movie titles
import pandas as pd
df = pd.read_csv("/content/ex_Stat_Software_Project_demo/titles_mapping.csv")
titles = df["title"].tolist()

def classify_genre(user_query):
    # Tokenize and classify genre using the fine-tuned DistilBERT model
    inputs = genre_tokenizer(user_query, return_tensors="pt", truncation=True, padding=True)
    outputs = genre_model(**inputs)
    predicted_class = torch.argmax(outputs.logits, dim=1).item()

    # Map predicted class to genre names (update as needed)
    genres = ['Action','Adventure','Animated','Comedy','Cult','Drama','Family','Historical/Documentary','Horror','International','Musical','Romance','Science Fiction','Short Film','Thriller']
    predicted_genre = genres[predicted_class]
    return predicted_genre

def find_top_k_matches(user_query, k=3):
    # Generate embedding for the user query
    query_embedding = semantic_model.encode([user_query], convert_to_numpy=True, normalize_embeddings=True)

    # Search in the FAISS index for the top-k matches
    distances, indices = index.search(query_embedding, k=k)

    # Retrieve titles and scores for the top-k matches
    results = []
    for i in range(k):
        match_index = indices[0][i]
        match_score = distances[0][i]
        match_title = titles[match_index]
        results.append((match_title, match_score))

    return results
'''
def answer_qa(question, context):
    # Tokenize the question and context for the QA model
    inputs = qa_tokenizer(question, context, return_tensors="pt")

    # Get start and end logits
    outputs = qa_model(**inputs)
    start_scores, end_scores = outputs.start_logits, outputs.end_logits

    # Get the most likely answer
    start_index = torch.argmax(start_scores)
    end_index = torch.argmax(end_scores)

    # Convert token indices to string
    answer = qa_tokenizer.convert_tokens_to_string(qa_tokenizer.convert_ids_to_tokens(inputs['input_ids'][0][start_index:end_index+1]))
    return answer.strip()
'''

def answer_qa(question, context):
    # Tokenize the question and context for the QA model
    inputs = qa_tokenizer(question, context, return_tensors="pt")

    # Get start and end logits
    outputs = qa_model(**inputs)
    start_logits, end_logits = outputs.start_logits, outputs.end_logits
    start_index = torch.argmax(start_logits)
    end_index = torch.argmax(end_logits)

    # Decode the answer
    answer_tokens = inputs['input_ids'][0][start_index:end_index+1]
    prediction = qa_tokenizer.decode(answer_tokens)
    #predictions.append(prediction)

    return prediction.strip()
    #return answer.strip()

def process_user_query(user_query):
    # Step 1: Classify the genre
    genre = classify_genre(user_query)
    print(f"Classified genre: {genre}")

    # Step 2: Concatenate genre with the query and find top-k movie matches
    query_with_genre = f"{genre}: {user_query}"
    top_k_results = find_top_k_matches(query_with_genre, k=3)

    # Print top-k movie suggestions
    print("Top movie suggestions based on your query:")
    for rank, (title, score) in enumerate(top_k_results, start=1):
        print(f"{rank}. {title} (score: {score:.4f})")

    # Step 3: Use the QA model to answer follow-up questions about one of the top movie suggestions
    movie_title = top_k_results[0][0]  # Taking the top match
    df2 = pd.read_csv("/content/ex_Stat_Software_Project_demo/context.csv")
    movie_plot = df2[df['title'] == movie_title]['context'].values[0]  # Assuming PlotSummary is in the dataset

    # Example of answering a follow-up question
    while True:
      print(f"Ask a question about {movie_title}, or type 'exit' to stop:")
      follow_up_question = input(f"Ask a question about {movie_title},: ")
      if follow_up_question.lower() == 'exit':
            break
      answer = answer_qa(follow_up_question, movie_plot)
      print(f"Answer: {answer}")

    #follow_up_question = input(f"Ask a question about {movie_title}: ")


# Example usage:
print("Welcome to the Movie Recommendation System!, What's on your mind today?")
user_query = input()
process_user_query(user_query)


Welcome to the Movie Recommendation System!, What's on your mind today?
A hacker discovers a simulated reality controlled by machines.
Classified genre: Science Fiction
Top movie suggestions based on your query:
1. darwin (score: 0.4974)
2. gamer (score: 0.4816)
3. weird science (score: 0.4614)


  movie_plot = df2[df['title'] == movie_title]['context'].values[0]  # Assuming PlotSummary is in the dataset


Ask a question about darwin, or type 'exit' to stop:
Ask a question about darwin,: What is the plot of darwin?
Answer: vicky ( jeevan ) is the eldest son of a railway officer , is a perpetual loser in whatever he does , and is looked down by other family members . one day he meets shivani ( kamna jethmalani ) , daughter of a rich textile tycoon , who has brought all the luck to her father , with her midas touch ! soon the ‘ unlucky ’ vicky is drawn towards the ' lucky ' shivani and they fall madly in love ! due to circumstances , they are forced to elope , with shivani ’ s cop brother in hot pursuit .
Ask a question about darwin, or type 'exit' to stop:
Ask a question about darwin,: Who are the main actors in darwin?
Answer: jeevan , kamna jethmalani
Ask a question about darwin, or type 'exit' to stop:
Ask a question about darwin,: exit


In [3]:
pip install faiss-cpu

Collecting faiss-cpu
  Downloading faiss_cpu-1.9.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.4 kB)
Downloading faiss_cpu-1.9.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.5/27.5 MB[0m [31m16.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.9.0.post1


In [None]:
import zipfile
import os

# Set the directory where your files are located
folder_path = '/content'  # Change this to your folder path

# Specify the output zip file
zip_filename = '/Stat_Software_Project_demo.zip'  # Change this to your desired output zip file path

# Create a zip file
with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            zipf.write(os.path.join(root, file), os.path.relpath(os.path.join(root, file), folder_path))

print(f"Files have been successfully zipped into {zip_filename}")


Files have been successfully zipped into /Stat_Software_Project_demo.zip
