In [1]:
from PIL import Image
print("🔥 Pillow imported successfully!")

🔥 Pillow imported successfully!


In [2]:
import torch
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("Device:", torch.cuda.get_device_name(0))
    print("CUDA version:", torch.version.cuda)


CUDA available: True
Device: NVIDIA GeForce RTX 4060 Laptop GPU
CUDA version: 11.8


In [3]:
import torch
import cv2
from ultralytics import YOLO

print("🔥 PyTorch Version:", torch.__version__)
print("🔥 OpenCV Version:", cv2.__version__)

model = YOLO("yolov8n.pt")
print("🔥 YOLO Model Loaded Successfully!")


🔥 PyTorch Version: 2.6.0+cu118
🔥 OpenCV Version: 4.11.0
🔥 YOLO Model Loaded Successfully!


In [3]:
import torch
import cv2
from ultralytics import YOLO

# Print version info
print("🔥 PyTorch Version:", torch.__version__)
print("🔥 OpenCV Version:", cv2.__version__)

# Load YOLOv8 model
model = YOLO("yolov8n.pt")  # Nano model for fast performance
print("✅ YOLO Model Loaded Successfully!")

# Start webcam
cap = cv2.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        print("Failed to grab frame")
        break

    # Run YOLO model on the frame
    results = model(frame)

    # Draw results on frame
    for result in results:
        for box in result.boxes:
            x1, y1, x2, y2 = map(int, box.xyxy[0])
            label = result.names[int(box.cls[0])]
            confidence = float(box.conf[0])

            # Draw bounding box
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(frame, f"{label}: {confidence:.2f}", (x1, y1 - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    # Display the frame
    cv2.imshow("YOLOv8 Real-Time Detection", frame)

    # Exit on pressing 'q'
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


🔥 PyTorch Version: 2.2.2+cpu
🔥 OpenCV Version: 4.11.0
✅ YOLO Model Loaded Successfully!

0: 480x640 (no detections), 78.0ms
Speed: 1.6ms preprocess, 78.0ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 59.7ms
Speed: 1.3ms preprocess, 59.7ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 53.9ms
Speed: 1.5ms preprocess, 53.9ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 59.5ms
Speed: 1.1ms preprocess, 59.5ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 60.3ms
Speed: 1.2ms preprocess, 60.3ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 59.4ms
Speed: 1.2ms preprocess, 59.4ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 57.4ms
Speed: 1.0ms preprocess, 57.4ms inference, 0.6ms postprocess per image at shap

In [4]:
import tensorflow as tf
#print(tf.__version__)  # Should print TensorFlow version
print(dir(tf.keras))  # Should list keras functions if installed correctly


['Input', 'Model', 'Sequential', '__builtins__', '__cached__', '__doc__', '__file__', '__internal__', '__loader__', '__name__', '__package__', '__path__', '__spec__', '__version__', '_sys', 'activations', 'applications', 'backend', 'callbacks', 'constraints', 'datasets', 'dtensor', 'estimator', 'experimental', 'initializers', 'layers', 'losses', 'metrics', 'mixed_precision', 'models', 'optimizers', 'preprocessing', 'regularizers', 'utils', 'wrappers']


In [5]:
# Get yolo 8n from curl -L -O https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n-face.pt

In [8]:
import torch
import cv2
from ultralytics import YOLO
from fer import FER
import numpy as np

# Load YOLOv8 face detection model
# Make sure "yolov8n-face.pt" is in your working directory or provide the correct path
model = YOLO("yolov8n-face-lindevs.pt")

# Load FER emotion detection model
emotion_detector = FER(mtcnn=True)

# Start webcam
cap = cv2.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        print("Failed to grab frame")
        break

    # Run YOLO face detection
    results = model(frame)

    # Process YOLO results
    for result in results:
        for box in result.boxes:
            x1, y1, x2, y2 = map(int, box.xyxy[0])

            # Extract face region
            face = frame[y1:y2, x1:x2]

            # Ensure face is large enough to process
            if face.shape[0] < 20 or face.shape[1] < 20:
                continue  # Skip too-small detections

            # Convert face to RGB for FER
            face_rgb = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)

            # Predict emotion
            emotion, score = emotion_detector.top_emotion(face_rgb)

            # Draw bounding box
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)

            # Display emotion label or 'No emotion'
            if emotion is None or score is None:
                cv2.putText(frame, "No emotion", (x1, y1 - 10),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
            else:
                cv2.putText(frame, f"{emotion} ({score:.2f})", (x1, y1 - 10),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)

    # Display the frame
    cv2.imshow("YOLOv8 Real-Time Emotion Detection", frame)

    # Exit on pressing 'q'
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()



0: 480x640 1 face, 48.6ms
Speed: 1.9ms preprocess, 48.6ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 face, 43.3ms
Speed: 1.3ms preprocess, 43.3ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 face, 44.2ms
Speed: 1.7ms preprocess, 44.2ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 face, 43.2ms
Speed: 1.6ms preprocess, 43.2ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 face, 39.0ms
Speed: 1.4ms preprocess, 39.0ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 face, 40.8ms
Speed: 1.3ms preprocess, 40.8ms inference, 0.7ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 face, 44.9ms
Speed: 1.3ms preprocess, 44.9ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 face, 40.3ms
Speed: 1.4ms preprocess, 40.3ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x

In [4]:
import torch
from diffusers import StableDiffusionPipeline
import cv2
import numpy as np

# Set device: use CUDA if available
device = "cuda" if torch.cuda.is_available() else "cpu"

# Load the Stable Diffusion pipeline from Hugging Face
pipe = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2-1")
pipe.to(device)

# Define a mapping from emotions to text prompts
emotion_to_prompt = {
    "happy": "A vibrant abstract painting representing joy and happiness",
    "sad": "A somber abstract painting representing melancholy and sadness",
    "angry": "A fiery, intense abstract painting representing anger and fury",
    "surprised": "A dynamic, colorful abstract painting representing surprise and excitement",
    "neutral": "A calm and balanced abstract painting representing neutrality"
}

# Cache for generated art to avoid regenerating for the same emotion repeatedly
generated_art = {}

def generate_art_for_emotion(emotion):
    # Retrieve prompt based on emotion; default to a generic abstract prompt if unknown
    prompt = emotion_to_prompt.get(emotion, "An abstract painting")
    print(f"Generating art for emotion: {emotion} with prompt: {prompt}")
    # Generate the image (this might take a while, especially on CPU)
    result = pipe(prompt, guidance_scale=7.5, num_inference_steps=50)
    art_image = result.images[0]
    return art_image

# Simulate detected emotion from your YOLO+FER pipeline:
detected_emotion = "happy"  # This value should come from your emotion detection code

# Check if art for this emotion has already been generated; if not, generate it
if detected_emotion not in generated_art:
    generated_art[detected_emotion] = generate_art_for_emotion(detected_emotion)

# Convert the generated PIL image to a format OpenCV can display
art_image = np.array(generated_art[detected_emotion])
# Stable Diffusion returns images in RGB format; convert to BGR for OpenCV
art_image = cv2.cvtColor(art_image, cv2.COLOR_RGB2BGR)

# Display the generated art in a new window
cv2.imshow("Generated Art", art_image)
cv2.waitKey(0)
cv2.destroyAllWindows()


  from .autonotebook import tqdm as notebook_tqdm
Loading pipeline components...: 100%|██████████| 6/6 [00:00<00:00,  9.52it/s]


Generating art for emotion: happy with prompt: A vibrant abstract painting representing joy and happiness


100%|██████████| 50/50 [00:44<00:00,  1.12it/s]
