In [6]:
!pip install streamlit
!streamlit run app.py &>/dev/null &
!cloudflared tunnel --url http://localhost:8501 &>/dev/null &

!pip install streamlit pyngrok
!pip install streamlit Pillow

!pip install -q -U google-generativeai
!pip install git+https://github.com/openai/CLIP.git
!pip install deepface

Collecting git+https://github.com/openai/CLIP.git
  Cloning https://github.com/openai/CLIP.git to /tmp/pip-req-build-i9asxeog
  Running command git clone --filter=blob:none --quiet https://github.com/openai/CLIP.git /tmp/pip-req-build-i9asxeog
  Resolved https://github.com/openai/CLIP.git to commit dcba3cb2e2827b402d2701e7e1c7d9fed8a20ef1
  Preparing metadata (setup.py) ... [?25l[?25hdone


In [7]:
import os

# Define userdata with 'ngrok' key
userdata = {'ngrok': ''}  # Replace YOUR_NGROK_AUTHTOKEN with your actual token

os.environ['ngrok_token'] = userdata.get('ngrok')
!ngrok authtoken $ngrok_token

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [8]:
%%writefile app.py
from deepface import DeepFace
import cv2
import numpy as np
from PIL import Image
import torch
import torchvision
from torchvision import transforms as T
from transformers import BlipProcessor, BlipForConditionalGeneration
import clip
import matplotlib.pyplot as plt
import google.generativeai as genai
import os

# Configure Generative AI
genai.configure(api_key='')
gemini_model = genai.GenerativeModel("gemini-1.5-flash")


# Set a custom cache directory for Hugging Face
os.environ["HF_HOME"] = "D:/huggingface_cache"


# COCO dataset class labels
COCO_INSTANCE_CATEGORY_NAMES = [
    '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
    'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign',
    'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
    'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A',
    'N/A', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
    'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
    'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana',
    'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut',
    'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table', 'N/A', 'N/A',
    'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cellphone', 'microwave',
    'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book', 'clock', 'vase', 'scissors',
    'teddy bear', 'hair drier', 'toothbrush'
]
# Load Faster R-CNN pre-trained model
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights='DEFAULT')
model.eval()

# Load BLIP and CLIP models
device = "cuda" if torch.cuda.is_available() else "cpu"
clip_model, clip_preprocess = clip.load("ViT-B/32", device=device)
blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device)
def get_gender(img, box):
    """Predict gender using DeepFace and classify as man or woman."""
    cropped_img = img[box[0][1]:box[1][1], box[0][0]:box[1][0]]
    try:
        result = DeepFace.analyze(cropped_img, actions=['gender'], enforce_detection=False)
        gender = result[0]['dominant_gender']
        if gender == "Male":
            return "man"
        elif gender == "Female":
            return "woman"
    except Exception:
        pass
    return "person"

def get_prediction(img_path, threshold=0.6, max_objects=8):
    """Perform object detection and predict labels."""
    img = Image.open(img_path).convert("RGB")
    img_tensor = T.ToTensor()(img)
    pred = model([img_tensor])

    pred_boxes = [[(int(i[0]), int(i[1])), (int(i[2]), int(i[3]))] for i in list(pred[0]['boxes'].detach().numpy())]
    pred_scores = list(pred[0]['scores'].detach().numpy())
    pred_labels = [COCO_INSTANCE_CATEGORY_NAMES[i] for i in list(pred[0]['labels'].numpy())]

    filtered_boxes, filtered_labels = [], []
    for score, box, label in zip(pred_scores, pred_boxes, pred_labels):
        if score > threshold:
            if label == "person":
                label = get_gender(cv2.imread(img_path), box)
            filtered_boxes.append(box)
            filtered_labels.append(label)

    return filtered_boxes[:max_objects], filtered_labels[:max_objects]
def generate_caption_blip(img_path):
    """Generate a caption using BLIP."""
    raw_image = Image.open(img_path).convert("RGB")
    inputs = blip_processor(raw_image, return_tensors="pt").to(device)
    output = blip_model.generate(**inputs)
    return blip_processor.decode(output[0], skip_special_tokens=True)

def generate_caption_clip(img_path):
    """Generate a caption using CLIP."""
    image = clip_preprocess(Image.open(img_path)).unsqueeze(0).to(device)
    text_inputs = torch.cat([clip.tokenize(f"a photo of a {c}") for c in COCO_INSTANCE_CATEGORY_NAMES]).to(device)

    with torch.no_grad():
        image_features = clip_model.encode_image(image)
        text_features = clip_model.encode_text(text_inputs)

    similarity = (image_features @ text_features.T).softmax(dim=-1)
    top_class = similarity.argmax().item()
    return COCO_INSTANCE_CATEGORY_NAMES[top_class]

def generate_story_with_gemini(scene_description, genre, detected_objects, blip_caption, clip_caption):
    """Generate a story based on the detected objects and captions."""
    genre_prompts = {
        "horror": "Write a chilling horror story. Focus on suspense, fear, and eerie details.",
        "comedy": "Write a humorous comedy story. Use light-hearted situations and playful dialogue.",
        "fantasy": "Write an imaginative fantasy story. Include magic, mythical creatures, or epic adventures.",
        "romance": "Write a romantic story. Focus on love, emotions, and heartfelt moments.",
        "sci-fi": "Write a futuristic science fiction story. Include advanced technology or space exploration.",
        "adventure": "Write an adventurous story. Focus on thrilling journeys and daring challenges."
    }

    genre_prompt = genre_prompts.get(genre, "Write an interesting story.")
    prompt = (
        f"{genre_prompt} "
        f"The scene is: {scene_description}. "
        f"Objects detected include: {', '.join(detected_objects)}. "
        f"BLIP caption: {blip_caption}. "
        f"CLIP caption: {clip_caption}."
    )

    model = genai.GenerativeModel("gemini-1.5-flash")
    response = model.generate_content(prompt)
    return response.text.strip()

import streamlit as st


def object_detection_api_with_action_story(img_path, threshold=0.6, max_objects=8,genre=["Horror", "Comedy", "Fantasy", "Romance", "Science Fiction", "Adventure"]):
    """Main function to perform object detection and story generation."""
    boxes, labels = get_prediction(img_path, threshold, max_objects)
    if not boxes:
        print("No objects detected.")
        return

    blip_caption = generate_caption_blip(img_path)
    clip_caption = generate_caption_clip(img_path)

    scene_description = f"In this scene, we see {', '.join(labels)}. {blip_caption}. Additionally, {clip_caption}."
    print(f"Scene Description: {scene_description}")

    # valid_genres = ["horror", "comedy", "fantasy", "romance", "sci-fi", "adventure"]
    # genre = ""
    # while genre not in valid_genres:
    #     genre = input(f"Enter the type of story ({', '.join(valid_genres)}): ").strip().lower()

    story = generate_story_with_gemini(scene_description, genre, labels, blip_caption, clip_caption)
    # print(f"\nGenerated {genre.capitalize()} Story:")
    # print(story)
    return story
    save_path = f"{genre}_story.txt"
    with open(save_path, "w") as f:
        f.write(story)
    print(f"Story saved as {save_path}")

#Streamlit APP

# Streamlit app title and layout
st.set_page_config(page_title="Art of Visual Storytelling", layout="wide")
st.markdown(
    """
    <style>
    .header {
        background-color: #6C63FF;
        padding: 20px;
        border-radius: 10px;
        text-align: center;
    }
    .header h1 {
        color: white;
        font-family: 'Arial', sans-serif;
    }
    .header p {
        color: #F1F1F1;
        font-size: 18px;
        font-style: italic;
    }
    .footer {
        text-align: center;
        margin-top: 50px;
        font-size: 14px;
        color: gray;
    }
    .genre-description {
        color: gray;
        font-size: 12px;
        margin-top: -10px;
    }
    </style>
    <div class="header">
        <h1>The Art of Visual Storytelling</h1>
        <p>Transform images into captivating stories across genres!</p>
    </div>
    """,
    unsafe_allow_html=True,
)

# Sidebar for genre selection with enhanced design
st.sidebar.title("✨ Choose Your Story's Genre:")
genres = {
    "Horror": "horror",
    "Action": "adventure",
    "Romance": "romance",
    "Comedy": "comedy",
    "Historical": "fantasy",
    "Science Fiction": "sci-fi",
    "General": "general",
}

# Genre descriptions with icons
genre_details = {
    "Horror": {"desc": "🕷️ Spooky tales to keep you up at night!", "color": "#FF4B4B"},
    "Action": {"desc": "🔥 Thrills and heroic adventures await!", "color": "#FF9900"},
    "Romance": {"desc": "❤️ Heartwarming stories of love and emotions.", "color": "#FF66CC"},
    "Comedy": {"desc": "😂 Light-hearted fun to make you smile!", "color": "#FFFF66"},
    "Historical": {"desc": "📜 Relive moments from the pages of history.", "color": "#66CCFF"},
    "Science Fiction": {"desc": "🚀 Explore futuristic worlds and advanced tech.", "color": "#66FF99"},
    "General": {"desc": "✨ Stories that suit any theme or mood.", "color": "#C0C0C0"},
}

# Render genres with styled descriptions in a dropdown
selected_genre = st.sidebar.selectbox(
    "Select your genre:",
    list(genres.keys()),
    help="Choose a genre to get started on your story."
)

# Style each genre description with dynamic colors
st.sidebar.markdown(
    f"""
    <div style='background-color: {genre_details[selected_genre]["color"]};
                padding: 10px;
                border-radius: 10px;
                text-align: center;'>
        <b>{selected_genre}</b>
        <p style='font-size: 14px; color: white;'>{genre_details[selected_genre]["desc"]}</p>
    </div>
    """,
    unsafe_allow_html=True,
)
# File uploader
st.write("Upload the image you want your story to be based on:")
uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "png", "jpeg"])

# Add interactive image preview
if uploaded_file is not None:
    st.image(uploaded_file, caption="Uploaded Image", use_container_width=True)
# Generate button
    if st.button("✨ Generate Story ✨"):
        # Save the uploaded file locally
        img_path = "temp_uploaded_image.jpg"
        with open(img_path, "wb") as f:
            f.write(uploaded_file.getvalue())

        # Call the object detection and story generation API
        with st.spinner("Generating your story..."):
            try:
                story = object_detection_api_with_action_story(
                    img_path, threshold=0.6, max_objects=8, genre=genres[selected_genre]
                )
                st.success(f"🎉 {selected_genre} Story Generated!🚀")
                st.text_area("Generated Story:", story, height=300)
            except Exception as e:
                st.error(f"Error generating the story: {e}")

        # Remove the temp image after processing
        os.remove(img_path)
  # Add a footer with credits
st.markdown(
    """
    <div class="footer">
        Created with ❤️ using Streamlit | Powered by AI
    </div>
    """,
    unsafe_allow_html=True,
)

Overwriting app.py


In [9]:
from pyngrok import ngrok

# Kill existing tunnels if any
ngrok.kill()


# Create a tunnel
public_url = ngrok.connect(8501)
print(f"Access your app at {public_url}")

Access your app at NgrokTunnel: "https://c771-35-187-150-237.ngrok-free.app" -> "http://localhost:8501"


In [10]:
!streamlit run app.py


Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8502[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8502[0m
[34m  External URL: [0m[1mhttp://35.187.150.237:8502[0m
[0m
[34m  Stopping...[0m
[34m  Stopping...[0m
