### Final Version

## Import and install packages

In [1]:
!pip install rank_bm25 gradio gtts langchain_community langchain_groq pymupdf faiss-cpu langchain langchain-community sentence-transformers fal-client requests -q

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m46.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.1/24.1 MB[0m [31m90.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.3/31.3 MB[0m [31m21.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m28.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.2/98.2 kB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m130.2/130.2 kB[0m [31m12.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m438.9/438.9 kB[0m [31m22.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.2/45.2 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
!pip install -U langchain langchain-community sentence-transformers fal-client requests -q

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [2]:
import os
import torch
from google.colab import userdata
from langchain.document_loaders import PyMuPDFLoader, CSVLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser
from langchain.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain_groq import ChatGroq
import gradio as gr
from typing import List, Tuple
from langchain.retrievers import BM25Retriever, EnsembleRetriever
from langchain.retrievers.document_compressors import CrossEncoderReranker
from langchain.retrievers import ContextualCompressionRetriever
from langchain_community.cross_encoders import HuggingFaceCrossEncoder
from langchain.prompts.prompt import PromptTemplate
import requests
import asyncio
import nest_asyncio
from IPython.display import Video, display
import fal_client

## Define the necessary functions

In [3]:
def _combine_documents(docs, document_prompt, document_separator="\n\n"):
    doc_strings = [
        f"Document {i}: \n'''\n{format_document(doc, document_prompt)}\n'''"
        for i, doc in enumerate(docs, 1)
    ]
    return document_separator.join(doc_strings)

def _format_chat_history(chat_history: List[Tuple]) -> str:
    turn = 1
    buffer = []
    for dialogue in chat_history:
        buffer.append(("Human: " if turn else "Assistant: ") + dialogue.content)
        turn ^= 1
    return "\n".join(buffer) + "\n"

def make_pairs(lst):
    """From a list of even length, make tuple pairs"""
    return [(lst[i], lst[i + 1]) for i in range(0, len(lst), 2)]

def make_html_source(i, doc):
    return f"""
<div class="card">
  <div class="card-content">
      <h3>Doc {i}</h3>
      <p>{BeautifulSoup(doc.page_content, 'html.parser')}</p>
  </div>
  <div class="card-footer">
    <span>page: {doc.metadata['page_number']}</span>
  </div>
</div>
"""

In [4]:
async def chat(query: str, history: list = []):
    """Taking a query and a message history, use a pipeline (reformulation, retriever, answering) to yield a tuple of:
    (messages in gradio format, messages in langchain format, source documents)"""
    source_string = ""
    gradio_format = make_pairs([a.content for a in history]) + [(query, "")]

    # Reset memory
    memory.clear()
    for message in history:
        memory.chat_memory.add_message(message)

    inputs = {"question": query}
    result = final_chain.astream_log({"question": query})

    reformulated_question_path_id = "/logs/ChatGroq/streamed_output_str/-"
    retriever_path_id = "/logs/Retriever/final_output"
    final_answer_path_id = "/logs/ChatGroq:2/streamed_output_str/-"

    async for op in result:
        op = op.ops[0]
        if op["path"] == reformulated_question_path_id:  # reformulated question
            new_token = op["value"]  # str

        elif op["path"] == retriever_path_id:  # documents
            sources = op["value"]["documents"]  # List[Document]
            source_string = "\n\n".join(
                [make_html_source(i, doc) for i, doc in enumerate(sources, 1)]
            )

        elif op["path"] == final_answer_path_id:  # final answer
            new_token = op["value"]  # str
            answer_yet = gradio_format[-1][1]
            gradio_format[-1] = (query, answer_yet + new_token)

        yield "", gradio_format, history, source_string

    memory.save_context(inputs, {"answer": gradio_format[-1][1]})
    yield "", gradio_format, memory.load_memory_variables({})["history"], source_string


In [6]:
def interpret_dream(dream_text: str):
    """
    Takes a dream description and returns an interpretation based on vectorized book data
    """
    # Retrieve relevant documents
    relevant_docs = retriever.get_relevant_documents(dream_text)

    # Format context from retrieved documents
    context = "\n\n".join([
        f"Document {i+1}: {doc.page_content}"
        for i, doc in enumerate(relevant_docs)
    ])

    # Generate interpretation
    response = interpretation_chain.invoke({
        "context": context,
        "dream": dream_text
    })

    video_prompt = summary_chain.invoke({"interpretation": response})

    return response, video_prompt

def initialize_dream_interpreter():
    """Initialize the dream interpreter with vectorized book data"""
    global retriever, interpretation_chain, summary_chain, llm

    # Set up API key
    os.environ["GROQ_API_KEY"] = userdata.get("GROQ_API_KEy")

    # Load and process documents
    documents = []
    path_data = "/content/Dream data"

    if os.path.exists(path_data):
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=256,
            chunk_overlap=50,
            length_function=lambda x: len(x.split())
        )

        for file in os.listdir(path_data):
            file_path = os.path.join(path_data, file)

            if file.endswith(".pdf"):
                loader = PyMuPDFLoader(file_path)
                docs = loader.load_and_split(text_splitter)
            elif file.endswith(".csv"):
                loader = CSVLoader(file_path)
                docs = loader.load_and_split(text_splitter)
            else:
                continue

            # Add page number metadata if missing
            for doc in docs:
                doc.metadata.setdefault("page_number", "N/A")

            documents.extend(docs)


    if not documents:
        raise ValueError("No documents found. Please add dream interpretation documents to the specified path.")

    bm25_retriever = BM25Retriever.from_documents(documents)
    bm25_retriever.k = 5

    # Initialize embeddings and vectorstore
    device = "cuda" if torch.cuda.is_available() else "cpu"
    embeddings = HuggingFaceBgeEmbeddings(
        model_name="BAAI/bge-small-en",
        encode_kwargs={"normalize_embeddings": True},
        model_kwargs={"device": device},
        query_instruction="Represent this sentence for searching relevant passages: "
    )

    dense_vectorstore = FAISS.from_documents(documents, embedding=embeddings)
    dense_retriever = dense_vectorstore.as_retriever(search_kwargs={"k": 5})



    # Create retriever
    hybrid = EnsembleRetriever(
        retrievers=[bm25_retriever, dense_retriever],
        weights=[0.5, 0.5]
    )

    # Cross encoder rerank
    cross_encoder_model = HuggingFaceCrossEncoder(
        model_name="BAAI/bge-reranker-base",
        model_kwargs={"device": device}
    )

    reranker = reranker = CrossEncoderReranker(
        model=cross_encoder_model,
        top_n=5
    )

    retriever = ContextualCompressionRetriever(
        base_retriever=hybrid,
        base_compressor=reranker
    )

    # Initialize LLM
    llm = ChatGroq(temperature=0, model_name="llama3-8b-8192")

    # Create interpretation prompt
    interpretation_prompt = ChatPromptTemplate.from_template("""
You are a thoughtful and emotionally intelligent Dream Interpreter AI.

Based only on the dream and the symbolic material provided below, write a single, cohesive interpretation in natural, human-sounding English.

Instructions:
- Speak to the dreamer as if you’re helping them understand themselves — be warm, reflective, and psychologically insightful.
- Do not summarize or repeat the dream; go straight into what it might mean.
- Avoid generic or filler phrases like “this dream can be interpreted in various ways” or “it appears that.” Speak with quiet confidence.
- Do not use document numbers, citations, or academic references.
- Refrain from repeating the same idea with slightly different wording (e.g., don’t say “repressed” and then repeat it as “hidden aspects” right after).
- If multiple symbolic elements (like snake, desert, chase) are involved, tie them together in a fluid way that reflects emotional or psychological conflict.
- Bring emotional tone into the interpretation — fear, shame, desire, hope — based on what the symbols suggest.
- End with a gentle but honest reflection — what might this dream be inviting the dreamer to explore or face?

Symbolic Material:
{context}

Dream to Interpret:
{dream}

Final Interpretation:""")

    # Create the interpretation chain
    interpretation_chain = interpretation_prompt | llm | StrOutputParser()

    print("Dream Interpreter initialized successfully!")

    summary_prompt = PromptTemplate.from_template(
    "Summarize this dream interpretation into a short, visual scene description (1–2 lines) for a video generation model:\n\n{interpretation}\n\nScene:"
)

    summary_chain = summary_prompt | llm | StrOutputParser() # reuse or define new ChatGroq instance

nest_asyncio.apply()
FAL_KEY = userdata.get('FAL_AI_KEY')
os.environ["FAL_KEY"] = FAL_KEY
client = fal_client.AsyncClient()

async def video_generate(prompt_text : str, duration : int = 5):
    result = await fal_client.run_async(
        "fal-ai/fast-svd/text-to-video",
        arguments={"prompt": prompt_text, "duration" : duration},
    )

    # Step 2: Extract video URL
    video_url = result.get("video_url") or result.get("video", {}).get("url")

    return video_url

# Example usage:
def run_pipeline():
    # Initialize the system
    initialize_dream_interpreter()

    # Example dream interpretation
    dream = "I dreamed I was flying over a dark forest, then suddenly fell into water"
    interpretation, video_prompt = interpret_dream(dream)
    print("Dream Interpretation:")
    print(interpretation)
    print("Video Prompt:")
    print(video_prompt)

    return video_prompt

video_prompt = run_pipeline()
await video_generate(video_prompt)

  embeddings = HuggingFaceBgeEmbeddings(
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/90.8k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/684 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/133M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/799 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.11G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/443 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/279 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/34.1k [00:00<?, ?B/s]

Dream Interpreter initialized successfully!


  relevant_docs = retriever.get_relevant_documents(dream_text)


Dream Interpretation:
Dear one, your dream is a rich tapestry of symbolism, woven from the threads of your deepest desires, fears, and unconscious longings. Let us unravel its meaning together.

The dark forest, where you initially find yourself flying, may represent the unknown territories of your own psyche, where the shadows of your past and present reside. The act of flying, a symbol of freedom and empowerment, suggests that you've been exploring these inner realms, perhaps seeking to understand yourself better.

The sudden fall into water, however, is a jarring and disorienting experience. Water, as we've seen in the symbolic material, is often associated with birth, rebirth, and the unconscious. Your fall into the water may indicate a sense of being overwhelmed by your own emotions, desires, or unconscious impulses. The water's darkness could represent the unknown, the unexplored aspects of yourself that you're struggling to confront.

The combination of these elements may be hin

'https://storage.googleapis.com/isolate-dev-hot-rooster_toolkit_bucket/github_110602490/75b80b7b481243da9674df9ba0023990_reenc-tmpqv9mb5h9.mp4?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=GOOG1EEBKROPDBU3DT4T2J7OT2WSRVO2Y7OAW7FKPOUVTV5DCK4QLAZC7YDAA%2F20250623%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20250623T134202Z&X-Amz-Expires=604800&X-Amz-SignedHeaders=host&X-Amz-Signature=6181a9e41d847238e993fe270410e29541e7474c814950e3671adc2c45522e25'

## Video Generation

In [7]:
last_video_prompt = ""

def interpret_dream_ui(dream_text):
    global last_video_prompt
    interpretation, video_prompt = interpret_dream(dream_text)
    last_video_prompt = video_prompt
    return interpretation

def generate_video_sync():
    import asyncio
    if not last_video_prompt:
        return "⚠️ Please interpret a dream first before generating a video.", None
    loop = asyncio.get_event_loop()
    video_url = loop.run_until_complete(video_generate(last_video_prompt, duration=5))
    if video_url:
        return "🎬 Video generated successfully!", video_url
    else:
        return "⚠️ Failed to generate video.", None



## UI & CSS

In [8]:
# New code checking

unified_css = """
body {
    background: url('https://images.unsplash.com/photo-1503264116251-35a269479413?auto=format&fit=crop&w=1950&q=80') no-repeat center center fixed;
    background-size: cover;
    font-family: 'Poppins', sans-serif;
    margin: 0;
    padding: 0;
}

/* Gradio container transparent */
.gradio-container {
    background:
        linear-gradient(135deg, #667eea 0%, #764ba2 100%),
        url('https://images.unsplash.com/photo-1503264116251-35a269479413?auto=format&fit=crop&w=1950&q=80');
    background-blend-mode: overlay;
    background-size: cover;
    background-position: center;
    max-width: 1500px !important;
    margin: 0 auto !important;
    min-height: 100vh !important;
    padding-bottom: 2rem;
    color: white !important;
}

/* Translucent panels with blur */
.header-container,
.input-container,
.output-container,
.examples-container {
    background: rgba(255, 255, 255, 0.15) !important;
    backdrop-filter: blur(20px) !important;
    -webkit-backdrop-filter: blur(20px) !important;
    border-radius: 20px !important;
    padding: 2rem !important;
    border: 1px solid rgba(255, 255, 255, 0.15) !important;
    box-shadow: 0 8px 32px rgba(0, 0, 0, 0.35) !important;
    color: white !important;
}

.output-container .markdown-body {
    background: transparent !important;
    color: white !important;
    padding: 0 !important;
    border: none !important;
    box-shadow: none !important;
}

/* Title & subtitles */
.main-title {
    font-size: 3rem !important;
    font-weight: 700 !important;
    color: white !important;
    margin-bottom: 0.5rem !important;
}
.subtitle {
    font-size: 1.2rem !important;
    color: #e0e0e0 !important;
    margin-bottom: 1rem !important;
}
.examples-title {
    color: black !important;
    font-size: 1.3rem !important;
    font-weight: 600 !important;
    margin-bottom: 1rem !important;
}

/* Textbox */
.dream-input {
    border-radius: 10px !important;
    border: 2px solid rgba(255,255,255,0.25) !important;
    padding: 16px !important;
    font-size: 1rem !important;
    background: rgba(255,255,255,0.55) !important;
    color: white !important;
}
.dream-input::placeholder {
    color: rgba(255,255,255,0.1) !important;
}
.dream-input:focus {
    border-color: #a49cfc !important;
    outline: none !important;
    box-shadow: 0 0 0 3px rgba(164, 156, 252, 0.25) !important;
}

/* Output */
.interpretation-output {
    background: rgba(0, 0, 0, 0.3) !important;
    border-radius: 12px !important;
    padding: 20px !important;
    border-left: 4px solid #9b7cf0 !important;
    font-size: 1rem !important;
    line-height: 1.6 !important;
    color: white !important;
}

/* Button */
.interpret-btn {
    background: linear-gradient(45deg, #1e184a, #6a57ff) !important;
    border: none !important;
    border-radius: 25px !important;
    padding: 12px 30px !important;
    font-size: 1.1rem !important;
    font-weight: 600 !important;
    color: white !important;
    cursor: pointer !important;
    transition: all 0.3s ease !important;
    box-shadow: 0 4px 15px rgba(255, 255, 255, 0.15) !important;
}
.interpret-btn:hover {
    transform: scale(1.03) !important;
    box-shadow: 0 6px 25px rgba(255, 255, 255, 0.3) !important;
}

/* Markdown output fix (no black box) */
.interpretation-output,
.interpretation-output > div,
.interpretation-output > div > div,
.interpretation-output * {
    background: transparent !important;
    color: black !important;
    padding: 0 !important;
    border: none !important;
    box-shadow: none !important;
    font-size: 1rem !important;
    line-height: 1.6 !important;
}

/* Optional left border for emphasis */
.interpretation-output {
    border-left: 4px solid #9b7cf0 !important;
    border-radius: 12px !important;
    padding-left: 12px !important;
}
/* Example items */
.example-item {
    background: rgba(255, 255, 255, 0.08) !important;
    border-radius: 10px !important;
    padding: 10px 15px !important;
    margin-bottom: 0.6rem !important;
    cursor: pointer !important;
    color: black !important;
    border: 1px solid rgba(255, 255, 255, 0.2) !important;
    transition: all 0.3s ease !important;
}
.example-item:hover {
    background: linear-gradient(45deg, #1e184a, #6a57ff) !important;
    color: white !important;
    transform: translateX(5px) scale(1.02) !important;
}

/* Footer */
.footer {
    text-align: center;
    padding: 2rem;
    color: white !important;
    font-size: 0.9rem;
    line-height: 1.4;
}
"""


In [9]:
# Video included
def launch_app():
    with gr.Blocks(css=unified_css) as demo:
        gr.HTML("<script>document.body.classList.add('dark-mode');</script>")

        # HEADER
        with gr.Row(elem_classes="header-container"):
            gr.HTML("""
                <div>
                    <h1 class="main-title">🌙 Dream Interpreter</h1>
                    <p class="subtitle">Unlock the mysteries of your dreams with AI-powered interpretation</p>
                    <p style="font-size: 1rem;">Discover hidden meanings using psychology-informed LLMs trained on Freudian & Jungian literature.</p>
                </div>
            """)

        # INPUT SECTION
        with gr.Row():
            with gr.Column(scale=2):
                with gr.Group(elem_classes="input-container"):
                    gr.HTML("<h3 class='section-heading'>📝 Tell Us About Your Dream</h3>")
                    dream_input = gr.Textbox(
                        placeholder="Describe your dream in detail...",
                        lines=6,
                        max_lines=10,
                        elem_classes="dream-input"
                    )
                    with gr.Row():
                        interpret_btn = gr.Button("✨ Interpret My Dream", elem_classes="interpret-btn")
                        generate_btn = gr.Button("🎥 Generate Video", elem_classes="interpret-btn")
                        clear_btn = gr.Button("🗑️ Clear")

            with gr.Column(scale=1):
                with gr.Group(elem_classes="examples-container"):
                    gr.HTML("<h3 class='examples-title'>💡 Try These Examples</h3>")
                    examples = [
                        "I was flying over a forest and saw a red bird.",
                        "I fell off a cliff into water.",
                        "I walked through endless doors in a strange house.",
                        "I was chased by a snake through a desert.",
                        "I met my younger self in a garden."
                    ]
                    for ex in examples:
                        gr.Button(ex, elem_classes="example-item").click(lambda x=ex: x, outputs=dream_input)

        # OUTPUT SECTION
        with gr.Row():
            with gr.Column():
                with gr.Group(elem_classes="output-container"):
                    gr.HTML("<h3 class='section-heading'>🔮 Your Dream Interpretation</h3>")
                    interpretation_output = gr.Markdown(
                        "Your interpretation will appear here...",
                        elem_classes="interpretation-output"
                    )
                with gr.Group(elem_classes="output-container"):
                    gr.HTML("<h3 class='section-heading'>🎞️ Dream Scene Video</h3>")
                    video_status = gr.Textbox(
                        value="No video generated yet.",
                        interactive=False,
                        show_label=False,
                        elem_classes="interpretation-output"
                    )
                    video_output = gr.Video(label="Dream Scene")

        # FOOTER
        gr.HTML("""
        <div class="footer">
            <p style="color: white;">🌟 Powered by LangChain, HuggingFace, and Groq LLMs</p>
            <p style="font-size: 0.8rem; margin-top: 0.5rem; color: white;">
                Interpretations are not medical advice. Use them for self-reflection and insight ✨
            </p>
        </div>
        """)

        # ACTIONS
        interpret_btn.click(fn=interpret_dream_ui, inputs=dream_input, outputs=interpretation_output)
        generate_btn.click(fn=generate_video_sync, outputs=[video_status, video_output])
        clear_btn.click(lambda: ("", "", "No video generated yet."), outputs=[dream_input, interpretation_output, video_status])
        dream_input.submit(fn=interpret_dream_ui, inputs=dream_input, outputs=interpretation_output)

    demo.launch(share=True)


## Launch The App

In [10]:
launch_app()

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://a4c6da76daa778d304.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
