<a href="https://colab.research.google.com/github/Chinmaysahoo03/Multimodal_GenAI_Assistant/blob/main/Multimodal.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install langchain-groq transformers torch diffusers accelerate pillow

In [None]:
import torch
from diffusers import StableDiffusionPipeline
from PIL import Image
from IPython.display import display
import warnings
warnings.filterwarnings("ignore")  # Suppress minor warnings for cleaner output

# Setup Groq + LangChain for Chatbot (free, fast LLM)
from google.colab import userdata
groq_api = userdata.get('groq_api')  # Add your Groq key in Colab Secrets

from langchain_groq import ChatGroq
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

# Initialize free Groq model (Gemma-2-9B-IT)
llm = ChatGroq(api_key=groq_api, model="gemma2-9b-it", temperature=0.7)

# Prompt template for conversational style with history
prompt_template = PromptTemplate(
    input_variables=["history", "input"],
    template="{history}\nHuman: {input}\nAssistant:"
)

chain = LLMChain(llm=llm, prompt=prompt_template)

# Setup Text-to-Image: Stable Diffusion (free, local, no API key)
pipe = StableDiffusionPipeline.from_pretrained(
    "CompVis/stable-diffusion-v1-4",
    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
)
if torch.cuda.is_available():
    pipe = pipe.to("cuda")
    print("GPU enabled for faster generation!")

print("Models loaded successfully! Groq for chat, Stable Diffusion for images.")

In [None]:
def generate_chat_response(user_input, history=""):
    # Prepare input as a dictionary for invoke
    input_dict = {"input": user_input, "history": history}

    # Use invoke instead of run
    response = chain.invoke(input_dict)

    # Extract the response (assuming it's in the 'text' key or similar; adjust if needed)
    response_text = response['text'] if 'text' in response else response.get('output', 'No valid response')
    new_history = f"{history}\nHuman: {user_input}\nAssistant: {response_text}"
    return response_text.strip(), new_history

# Test standalone
test_input = "Hello, tell me about generative AI."
response, history = generate_chat_response(test_input)
print(f"User: {test_input}")
print(f"Bot: {response}")

In [None]:
def generate_image(prompt, negative_prompt="blurry, low quality", num_steps=50):
    image = pipe(
        prompt,
        negative_prompt=negative_prompt,
        num_inference_steps=num_steps
    ).images[0]

    # Display and save
    display(image)  # Shows inline in Colab
    image.save("generated_image.png")
    print(f"Image saved as generated_image.png")
    return image

# Test standalone
test_prompt = "a friendly robot chatting with a human in a futuristic room"
generate_image(test_prompt)

In [None]:
# Interactive multimodal assistant loop
print("Multimodal AI Assistant Ready! Type 'quit' to exit. Use 'generate image: [description]' to create an image.")
history = ""  # Stateful history for Groq

while True:
    user_input = input("\nYou: ")
    if user_input.lower() == 'quit':
        break

    # Check for image generation command
    if user_input.lower().startswith('generate image:'):
        image_prompt = user_input[15:].strip()  # Extract prompt after "generate image:"
        print("Generating image...")
        generate_image(image_prompt)
        continue  # Skip text response for pure image requests

    # Generate text response with Groq
    response, history = generate_chat_response(user_input, history)
    print(f"Assistant: {response}")

    # Optional: Offer to generate image based on response
    gen_choice = input("Generate an image based on this response? (y/n): ")
    if gen_choice.lower() == 'y':
        image_prompt = response[:100] + "..."  # Use response as prompt (truncate if long)
        print("Generating contextual image...")
        generate_image(image_prompt)

In [None]:
# Run a scripted demo to showcase integration
demo_inputs = [
    "Hi, what's generative AI?",
    "Can you describe a scene of AI creating art?",
    "generate image: AI painting a masterpiece"
]

history = ""
for user_input in demo_inputs:
    if user_input.lower().startswith('generate image:'):
        image_prompt = user_input[15:].strip()
        print(f"\n--- Image Request: {image_prompt} ---")
        generate_image(image_prompt)
    else:
        print(f"\nYou: {user_input}")
        response, history = generate_chat_response(user_input, history)
        print(f"Assistant: {response}")
        print("---")