# Mini RAG System for Movie Plots

In [2]:
import sys
import os

sys.path.append(os.path.abspath(os.path.join('..')))

In [3]:
import json
import openai
from dotenv import load_dotenv
from IPython.display import display, Markdown

In [4]:
from src.pipeline import load_and_preprocess_data, chunk_documents, create_vector_store
from src.main import query_rag_system

[32m2025-09-25 11:28:08[0m | [1mINFO    [0m | [36mutils.logger.logging_manager[0m:[36m__init__[0m:[36m66[0m - [1mGeneral logger initialized[0m
[32m2025-09-25 11:28:08[0m | [1mINFO    [0m | [36mutils.logger.logging_manager[0m:[36m__init__[0m:[36m67[0m - [1mLogging to file: logs/general\general_20250925_112808_11272.log[0m


In [5]:
# Load environment variables
load_dotenv(dotenv_path='../.env')

# Initialize the OpenAI client
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
    raise ValueError("OPENAI_API_KEY not found in .env file")
client = openai.OpenAI(api_key=api_key)

print("Setup complete and functions imported.")

Setup complete and functions imported.


In [6]:
# Path to the dataset, relative to the root of the project
DATA_PATH = "../data/wiki_movie_plots_deduped.csv"

# Run the pipeline functions
documents = load_and_preprocess_data(DATA_PATH)
chunks = chunk_documents(documents)
collection, embedding_model = create_vector_store(chunks)

print("\n--- Knowledge Base is Ready ---")

[32m2025-09-25 11:29:17[0m | [1mINFO    [0m | [36msrc.pipeline[0m:[36mload_and_preprocess_data[0m:[36m14[0m - [1mLoading and preprocessing 300 rows from ../data/wiki_movie_plots_deduped.csv...[0m
[32m2025-09-25 11:29:18[0m | [1mINFO    [0m | [36msrc.pipeline[0m:[36mload_and_preprocess_data[0m:[36m22[0m - [1mLoaded 300 documents.[0m
[32m2025-09-25 11:29:18[0m | [1mINFO    [0m | [36msrc.pipeline[0m:[36mchunk_documents[0m:[36m30[0m - [1mChunking 300 documents...[0m
[32m2025-09-25 11:29:18[0m | [1mINFO    [0m | [36msrc.pipeline[0m:[36mchunk_documents[0m:[36m36[0m - [1mCreated 1071 chunks.[0m
[32m2025-09-25 11:29:18[0m | [1mINFO    [0m | [36msrc.pipeline[0m:[36mcreate_vector_store[0m:[36m44[0m - [1mInitializing embedding model and vector store...[0m
[32m2025-09-25 11:29:47[0m | [1mINFO    [0m | [36msrc.pipeline[0m:[36mcreate_vector_store[0m:[36m54[0m - [1mVector store created successfully.[0m



--- Knowledge Base is Ready ---


In [None]:
import ipywidgets as widgets
from IPython.display import display, Markdown

# 1. Create the UI components
text_input = widgets.Text(
    value='Which movie is about an AI computer that becomes hostile?',
    placeholder='Type your question here...',
    description='Query:',
    layout=widgets.Layout(width='90%')
)
submit_button = widgets.Button(
    description='Ask RAG System',
    button_style='success',
    tooltip='Click to run the query'
)
output_area = widgets.Output()

# 2. Define the function that runs on button click
def on_button_click(b):
    with output_area:
        # Clear previous output
        output_area.clear_output()
        
        # Get the query from the text box
        user_query = text_input.value
        
        # Run the RAG system
        rag_output = query_rag_system(user_query, collection, embedding_model)
        
        # Display the results in a nice format
        md_output = f"""
        ### Query
        > {user_query}

        ### Answer
        {rag_output['answer']}

        ### Reasoning
        {rag_output['reasoning']}

        ### Contexts Used
        1. **Context 1:** {rag_output['contexts'][0]}
        2. **Context 2:** {rag_output['contexts'][1]}
        3. **Context 3:** {rag_output['contexts'][2]}
        """
        display(Markdown(md_output))

# 3. Link the button click event to the function
submit_button.on_click(on_button_click)

# 4. Display the UI components
print("Enter your question about movie plots below and click the button.")
display(text_input, submit_button, output_area)