# Transformer + LLM + RAG: Conversational Movie Recommender

This notebook implements the Transformer-augmented conversational movie recommender system.
The model has:
- LLM (llama3.2:1B)
- RAG with INSPIRED dataset
- BERT-based Transformer with trained recommender head for movie predictions

The Transformer model was trained separately in [Transformer_Training.ipynb](./Transformer_Training.ipynb).

## Environment Initialization

In [1]:
'''
Function:
    - Check Current Working Directory
    - Move to Correct Directory
'''
import os

os.chdir("..")
print("Current Working Directory:", os.getcwd())

Current Working Directory: C:\Users\91953\Documents\GitHub\Optimized-RAG


### Requirements

In [2]:
!pip install -r requirements.txt



### RAG, LLM, Global Settings

In [15]:
# System
from pathlib import Path
import csv
import subprocess
import shutil
# Llama
from llama_index.core import VectorStoreIndex, Document, Settings
from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.llms.ollama import Ollama
# UX
from tqdm import tqdm
import emoji
# Scripts
from configs.prompts import INTERACTIVE_CHAT_PROMPT, CONTEXTUAL_EVALUATION_PROMPT
from scripts.rag_utils import load_and_index_documents
from scripts.data_loader import INSPIREDDataProcessor
from scripts.baseline import PopularityRecommender
# Evaluation Scripts
from scripts.evaluator import EvaluationPipeline
from scripts.evaluator import ContextualEvaluator
from scripts.evaluator import EvaluationVisualizer
# Transformer Specific
import torch
from scripts.transformer_recommender import TransformerRecommender

In [4]:
# Initialize the embedding model
embed_model = OllamaEmbedding(
    model_name="nomic-embed-text",
    request_timeout=300.0,
)

# Initialize the LLM with optimized settings
llm = Ollama(
    model="llama3.2:1B",
    request_timeout=300.0,
    temperature=0.1,  
    additional_kwargs={"num_gpu": 0}  # Forcing CPU usage
)

# Set global configurations
Settings.embed_model = embed_model
Settings.llm = llm

# Load Model

In [16]:
# Load trained Transformer model
print("Loading trained Transformer model...")

# Initialize data processor
data_processor = INSPIREDDataProcessor(dataset_dir="data")
data_processor.load_movie_database()

# Load model
# Temporary path:
model_path = "C:/Users/91953/Documents/GitHub/RAG-Movie-CRS/models/transformer_checkpoints/best_model.pt"

# model_path = "models/transformer_checkpoints/best_model.pt"
transformer_model = TransformerRecommender(num_movies=len(data_processor.movie_id_map))

# Load trained weights
checkpoint = torch.load(model_path, map_location='cpu')
transformer_model.load_state_dict(checkpoint['model_state_dict'])
transformer_model.eval()

print(f"Model loaded from {model_path}")
print(f"Number of movies in model: {len(data_processor.movie_id_map)}")

Loading trained Transformer model...
In load_movie_database().
Loading 17869 movies from database...
After Filtering, loaded 17869 movies
Skipped: 0 missing titles + 0 'nan' titles
Total filtered: 0
Model loaded from C:/Users/91953/Documents/GitHub/RAG-Movie-CRS/models/transformer_checkpoints/best_model.pt
Number of movies in model: 16764


## Load Index

In [None]:
# Load index
index = load_and_index_documents(split="train", max_rows=None, persist_dir="data/index", force_rebuild=False)

Force rebuild requested. Deleting existing index...


Loading data: 100%|██████████| 28710/28710 [00:00<00:00, 38994.56it/s]


Loaded 28710 turns from INSPIRED dataset
Building vector index...


Parsing nodes:   0%|          | 0/28710 [00:00<?, ?it/s]

### Interactive Conversation

In [None]:
'''
Interactive conversation with Transformer + LLM + RAG
'''
def interactive_conversation_with_history():
    
    try:
      
        # Create chat engine
        print("Creating Chat Engine...")
        
        chat_engine = index.as_chat_engine(
            similarity_top_k=5,
            chat_mode="compact",  #condense_plus_context
            system_prompt=CONTEXTUAL_EVALUATION_PROMPT #INTERACTIVE_CHAT_PROMPT
        )
        
        print("||"+"=="*10+"\t MovieCRS is Ready\t"+"=="*10+"||")
        
        print("\nYou can now ask for movie recommendations.")
        print("Type 'quit', 'exit', or 'q' to end the conversation.\n")
        
        while True:
            user_input = input(f"{emoji.emojize(':technologist:')} You:\t ").strip()
            
            if user_input.lower() in ['quit', 'exit', 'q', 'bye']:
                print(f"\n{emoji.emojize(':robot:')} MovieCRS:\t Exiting...")
                break
            
            if not user_input:
                continue
            
            try:
                
                print(f"\n{emoji.emojize(':robot:')} MovieCRS:\t ", end="", flush=True)
                streaming_response = chat_engine.stream_chat(user_input)
                
                # Stream tokens as they're generated
                for token in streaming_response.response_gen:
                    print(token, end="", flush=True)
                
            except Exception as e:
                print(f"Error: {str(e)}\n")
        
        return True
    
    except Exception as e:
        print(f"System Error: {str(e)}")
        return False

## Main

In [None]:
# Main execution
if __name__ == "__main__":
    
    print("Starting RAG Pipeline with INSPIRED Dataset...")
    
    # Start interactive conversation with history
    success = interactive_conversation_with_history()
    
    if not success:
        print("\nSystem failed to start.")

## Evaluation

### Standard Metrics

In [None]:
# Initialize evaluation pipeline
eval_pipeline = EvaluationPipeline(
    model=transformer_model,
    data_processor=data_processor,
    model_name="Transformer+LLM+RAG",
    k_values=[1, 3, 5, 10]
)

# Run evaluation on test set
results = eval_pipeline.run_evaluation(
    split="test",
    max_samples=None,
    top_k=10
)

# Save results
eval_pipeline.save_results(
    output_path="data/evaluation/trained_transformer_metrics.json",
    metadata={
        'split': 'test',
        'model_checkpoint': 'models/transformer_checkpoints/best_model.pt',
        'model_type': 'BERT-based Transformer with recommender head'
    }
)

# Display results as table
results_df = eval_pipeline.get_results_table()
display(results_df)

In [None]:
# View Saved Results
import json

# Load and display saved results
with open("data/evaluation/trained_transformer_metrics.json", 'r') as f:
    saved_results = json.load(f)

print("\nSaved Results:")
print(json.dumps(saved_results, indent=2))

### Visualization

In [None]:
# Initialize visualizer
visualizer = EvaluationVisualizer(results_file="data/evaluation")

# Load all model results (add more models as you complete them)
model_files = {
    'Baseline': 'baseline_metrics.json',
    'Transformer': 'trained_transformer_metrics.json', 
    # 'RGCN': 'rgcn_metrics.json', 
    # 'NCF': 'ncf_metrics.json',  
}

visualizer.load_results(model_files)

In [None]:
# Plot HIT metric comparison
fig = visualizer.plot_grouped_bars(metric_type='HIT', k_values=[1, 3, 5, 10])
fig.show()

### Contextual Metrics

In [None]:
# Initialize contextual evaluator
contextual_eval = ContextualEvaluator(queries_dir="data/evaluation")

# Load queries
contextual_eval.load_queries()

In [None]:
# Create chat engine with contextual prompt
from llama_index.core.llms import ChatMessage, MessageRole

chat_engine_contextual = index.as_chat_engine(
    similarity_top_k=5,
    chat_mode="context",
    system_prompt=CONTEXTUAL_EVALUATION_PROMPT
)

print("Chat engine ready for contextual evaluation")

In [None]:
# Run all contextual queries
responses = contextual_eval.run_queries(chat_engine_contextual)

# Display responses
contextual_eval.display_responses()

In [None]:
try:
    # Collect ratings from user
    model_name = "Transformer+LLM+RAG"
    ratings = contextual_eval.collect_ratings(model_name)
    
    if ratings:  
        evaluator_id = contextual_eval.save_ratings(model_name, ratings)
    else:
        print("No ratings collected.")
        
except KeyboardInterrupt:  
    print("\n\nRating interrupted. Partial ratings not saved.")

In [None]:
# View summary of all evaluations
summary_df = contextual_eval.get_summary()
if summary_df is not None:
    display(summary_df)