In [None]:
# ===== Install necessary packages (Run this once) =====
!pip install torch torchvision pillow bitsandbytes sentencepiece \
huggingface_hub fairscale fire blobfile \
transformers accelerate requests scikit-learn pandas \
streamlit

In [None]:
# ===== Import Libraries =====
import pandas as pd
import smtplib
from email.mime.text import MIMEText
import torch
import torch.nn.functional as F
from PIL import Image
import requests
from transformers import AutoModelForCausalLM, AutoProcessor, AutoTokenizer, CLIPProcessor, CLIPModel, pipeline
from datetime import datetime
import os
import pickle
import pandas as pd
from sklearn.cluster import KMeans
import logging
import streamlit as st

In [None]:
# ===== Device Management =====
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

In [None]:
from huggingface_hub import login
import os
login(token=os.getenv('HF_TOKEN'))  # Secure token from environment variable

In [None]:
# ===== Load Models =====

In [None]:
# LLaMA-3 Vision Model
model_id = "meta-llama/Llama-3.2-11B-Vision-Instruct"
model = AutoModelForCausalLM.from_pretrained(
    model_id, torch_dtype=torch.float16 if device == "cuda" else torch.float32, device_map="auto"
).to(device)
processor = AutoProcessor.from_pretrained(model_id)
tokenizer = AutoTokenizer.from_pretrained(model_id)

In [None]:
# CLIP for similarity
clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

In [None]:
# Object detection pipeline (DETR)
object_detector = pipeline("object-detection", model="facebook/detr-resnet-50")

In [None]:
# Emotion classifier
emotion_classifier = pipeline('image-classification', model='trpakov/vit-face-expression')

In [None]:
# Helper function
def load_image_from_url(url):
    try:
        response = requests.get(url, stream=True)
        response.raise_for_status()
        return Image.open(response.raw).convert("RGB")
    except requests.exceptions.RequestException as e:
        print(f"Failed to load image: {e}")
        return None

In [None]:
def run_visual_chat(images, messages, max_new_tokens=128):
    input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
    inputs = processor(images=images, text=input_text, return_tensors="pt").to(device)
    with torch.inference_mode():
        output = model.generate(**inputs, max_new_tokens=max_new_tokens)
    return tokenizer.decode(output[0], skip_special_tokens=True).strip()

In [None]:
def batch_process_images(url_list, task_function):
    results = {}
    for url in url_list:
        results[url] = task_function(url)
    return results

In [None]:
def cache_embedding(url, embedding, cache_file='embeddings_cache.pkl'):
    if os.path.exists(cache_file):
        with open(cache_file, 'rb') as f:
            cache = pickle.load(f)
    else:
        cache = {}
    cache[url] = embedding
    with open(cache_file, 'wb') as f:
        pickle.dump(cache, f)

In [None]:
def summarize_batch_results(results_dict):
    combined_text = "\n".join([f"{url}: {result}" for url, result in results_dict.items()])
    messages = [{"role": "user", "content": f"Summarize these image analyses:\n{combined_text}"}]
    summary = run_visual_chat(None, messages, max_new_tokens=256)
    print("Batch Summary:\n", summary)

In [None]:
def cluster_images(urls, n_clusters=3):
    embeddings = [get_image_embedding(url).cpu().numpy().flatten() for url in urls if get_image_embedding(url) is not None]
    kmeans = KMeans(n_clusters=n_clusters).fit(embeddings)
    clusters = {url: int(label) for url, label in zip(urls, kmeans.labels_)}
    print("Image clusters:", clusters)

In [None]:
def detect_objects(image_url):
    image = load_image_from_url(image_url)
    if image:
        detections = object_detector(image)
        print("Detections:", detections)

In [None]:
def detect_emotions(image_url):
    image = load_image_from_url(image_url)
    if image:
        emotions = emotion_classifier(image)
        print("Detected Emotions:", emotions)