# **Seminararbeit**
von Leon Lantz

## **🚀 Bibliotheken-Import und CUDA-Verfügbarkeit**

In [None]:
import torch
import os
from diffusers import StableDiffusionPipeline
from diffusers import DiffusionPipeline
from diffusers import StableDiffusion3Pipeline
import shutil
import json
import mediapipe as mp
from torch import autocast
import random
import cv2
import string

print("Cuda verfügbar? -->", torch.cuda.is_available())
print(torch.cuda.get_device_name(0))

torch.cuda.empty_cache()
torch.cuda.ipc_collect()

## **🖼️ COCO Dataset** 
https://cocodataset.org/

- mehr als 200.000 reale Bilder aus unterschiedlichsten Szenarien
- unterteilt in 80 Kategorien (Personen, Fahrzeuge, Tiere, ...)
- jedes Bild detailliert annotiert, unter anderem mit Bildunterschriften (Captions)


In [None]:
# Ordner-Pfade
results_path  = 'results/'
images_path = 'coco_dataset/train2017'
annotations_path = 'coco_dataset/annotations/instances_train2017.json'
captions_path = 'coco_dataset/annotations/captions_train2017.json'

In [None]:
# Lade die COCO-Annotationen
with open(annotations_path, 'r') as f:
    coco_annotations = json.load(f)

# Lade die COCO-Captions
with open(captions_path, 'r') as f:
    coco_captions = json.load(f)


# Erstelle ein Mapping für ID und Bild-Pfad
mapping_filename = {}
for image in coco_annotations['images']:
    mapping_filename[image['id']] = image['file_name']

def get_filename_from_image_id(image_id):
    return mapping_filename.get(image_id, "Bild-ID nicht gefunden")

### **🚫 Nur Bilder mit passenden Captions herauszufiltern**

In [None]:
def contains_word(sentence, word_list):
    # Aufteilen des Satzes in Wörter und Konvertierung in Kleinbuchstaben für den Fallvergleich
    words_in_sentence = sentence.lower().split()
    # Überprüfen, ob eines der Wörter in der Wortliste im Satz enthalten ist
    return any(word.lower() in words_in_sentence for word in word_list)

def remove_punctuation(sentence):
    # Erstelle ein Übersetzungstabelle, die Satzzeichen durch leere Zeichen ersetzt
    translation_table = str.maketrans("", "", string.punctuation)
    # Entferne Satzzeichen aus dem Satz
    cleaned_sentence = sentence.translate(translation_table)
    return cleaned_sentence

# Filter nach captions mit bestimmten Worten
words_of_interest = ['person', 'man', 'woman', 'men', 'women', 'kid', 'child', 'face', 'girl', 'boy']
words_of_no_interest = ['ski', 'snow']

# Filtere Bild-IDs basierend auf Bildunterschriften und fülle ein Dictionary
image_dict = {}
for ann in coco_captions["annotations"]:
    caption = remove_punctuation(ann["caption"])
    image_id = ann["image_id"]
    # Überprüfe, ob die Bildunterschrift Wörter von Interesse enthält und keine unerwünschten Wörter enthält
    if contains_word(caption, words_of_interest) and not contains_word(caption, words_of_no_interest):
        image_dict[image_id] = (caption, get_filename_from_image_id(image_id))

## **🙂 Gesichtserkennung**

In [None]:
def detectFace(img_name):
    mp_face_detection = mp.solutions.face_detection

    def get_rectangle(objDictionary, ss):
        left = int(objDictionary.xmin * ss[1])
        top = int(objDictionary.ymin * ss[0])
        right = int(left + objDictionary.width * ss[1])
        bottom = int(top + objDictionary.height * ss[0])
        return ((left, top), (right, bottom))

    with mp_face_detection.FaceDetection(model_selection=3, min_detection_confidence=0.8) as face_detection:
        image = cv2.imread(img_name)
        results = face_detection.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

    if not results.detections:
        return None

    faces = []

    for obj in results.detections:
        ff = get_rectangle(obj.location_data.relative_bounding_box, image.shape)
        (left, top), (right, bottom) = ff
        faces.append(image[top:bottom, left:right])

    return faces

In [None]:
def prune_detected_faces(folder_name):
  files = os.listdir(folder_name)

  r_count = 0
  for f_name in files:
    faces = detectFace(os.path.join(folder_name,f_name))
    if not faces: 
      os.remove(os.path.join(folder_name,f_name))
      r_count += 1
  
  return r_count

## **✂️ Extrahieren von realen Gesichtern**

In [None]:
def find_faces(img_path, save_path, num_faces_wanted, face_res = 250):

  if os.path.exists(save_path):
    shutil.rmtree(save_path)
  os.mkdir(save_path)

  count = 0

  for img_id in random.sample(image_dict.keys(), num_faces_wanted*10):

    if count >= num_faces_wanted: break

    f_name = image_dict.get(img_id)[1]

    faces = detectFace(os.path.join(img_path,f_name))
    if not faces: continue

    for face in faces:
      if not face.size: continue
      face = cv2.resize(face,(face_res,face_res))
      cv2.imwrite(os.path.join((f'{save_path}'), str(count)+'.jpeg'), face)
      count += 1      

  return count

In [None]:
num_runs = 2
num_faces_wanted = 1000
face_res = 250

for n in range(num_runs):
    faces_generated = find_faces(images_path, f'{results_path}/realFaces{n}', num_faces_wanted, face_res) # type: ignore
    print(f'{faces_generated} Gesichter wurden im Ordner realFaces{n} generiert.')
    x = prune_detected_faces((f'{results_path}/realFaces{n}')) # type: ignore
    print(f'{x} Gesichter wurden entfernt.')

## **🔄 Generierung mit Stable Diffusion**

In [None]:
from transformers import AutoTokenizer
model_name = ""
access_token = "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"
def select_model_pipeline(name):
    if name == "sd14":
        # Instanziiere eine Stable Diffusion Pipeline aus dem Modell "CompVis/stable-diffusion-v1-4"
        pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", revision="fp16", torch_dtype=torch.float16)  
        pipe.to("cuda")
    elif name == "sdxl":
        # Instanziiere eine Stable Diffusion Pipeline aus dem Modell "CompVis/stable-diffusion-xl-base-1.0"
        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16, use_safetensors=True, variant="fp16")
        pipe.to("cuda")
    elif name == "sd11":
        # Instanziiere eine Stable Diffusion Pipeline aus dem Modell "CompVis/stable-diffusion-xl-base-1.0"
        pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-1", variant="fp16", torch_dtype=torch.float16)
        pipe.to("cuda")
    
    return pipe, name  # Rückgabe der Pipeline am Ende der Funktion
    
pipe, model_name = select_model_pipeline("sd11")

In [None]:
def find_faces_generated(save_path_images, save_path_faces, num_faces_wanted, face_res=250):

  if not os.path.exists(save_path_images):
    os.mkdir(save_path_images)    

  if not os.path.exists(save_path_faces):
    os.mkdir(save_path_faces)    
  
  count_f = 0
  count_i = 0

  for img_id in random.sample(image_dict.keys(), num_faces_wanted*10):

    if count_f >= num_faces_wanted: break

    caption = image_dict.get(img_id)[0]
    prompt = caption.lower() + ", photography, colorized, face clarity"
    negative_prompt = "bad anatomy, low quality, ugly, cartoon, anime, bad limbs, bad face, deformed, blurry eyes, multiple fingers, distorted, unrealistic, poorly rendered, unnatural, messy, pixelated, glitch, out of proportion, extra limbs, artifact, strange eyes, poorly drawn, bad perspective, awkward pose, extra legs, low resolution, bad expression, odd lighting, wrong shadows, blurry background, disconnected body parts, unnatural colors"

    print(prompt)

    # generate image
    image = pipe(prompt= prompt, negative_prompt=negative_prompt).images[0] 
    image.save(f'{save_path_images}/{count_i}.jpeg')
    count_i += 1

    faces = detectFace(os.path.join(f'{save_path_images}/', str(count_i-1)+'.jpeg'))
    if not faces: continue

    for face in faces:
      if not face.size: continue
      face = cv2.resize(face,(face_res,face_res))
      cv2.imwrite(os.path.join(f'{save_path_faces}/', str(count_f)+'.jpeg'), face)
      count_f += 1     

  return count_i, count_f

In [None]:
num_runs = 1
processed_images = set()
num_faces_wanted = 900
face_res = 250

for n in range(num_runs):
  images_generated, faces_generated = find_faces_generated(f'{results_path}/imagesGenerated_{model_name}_{n}', f'{results_path}/facesGenerated_{model_name}_{n}', num_faces_wanted, face_res)
  print(f'{images_generated} images were generated in folder {results_path}/imagesGenerated_{model_name}_{n}')
  print(f'{faces_generated} faces were detected! See them in folder {results_path}/facesGenerated_{model_name}_{n}')
  x = prune_detected_faces((f'{results_path}/facesGenerated_{model_name}_{n}'))
  print(f'{x} faces were removed')

## **🔧 Troubleshooting**

In [None]:
# Lösche den GPU-Cache
torch.cuda.empty_cache()

In [None]:
def rename_images_in_folder(folder_path, new_name):
    # List all files in the folder
    files = os.listdir(folder_path)
    count = 0

    for filename in files:
        # Construct old file path
        old_file_path = os.path.join(folder_path, filename)

        # Check if it's a file
        if os.path.isfile(old_file_path):
            #print(filename)
            # Get file extension
            file_extension = os.path.splitext(filename)[1]
            if not filename.lower().startswith('k'):
                print(filename)

                # Construct new file name and path
                new_file_name = f"{new_name}_{count}{file_extension}"
                new_file_path = os.path.join(folder_path, new_file_name)

                # Rename the file
                os.rename(old_file_path, new_file_path)
                print(f"Renamed: {old_file_path} to {new_file_path}")

                # Increment the counter
                count += 1
# Example usage
folder_path = "C:/Users/leonl/Seminararbeit/results/imagesGenerated_sdxl_0"
new_name = "val"
rename_images_in_folder(folder_path, new_name)

In [None]:
face_res = 250
faces = detectFace(os.path.join(f'C:/Users/leonl/Seminararbeit/results/valid__83.jpeg'))
print(faces)

for face in faces:
    face = cv2.resize(face,(face_res,face_res))
    cv2.imwrite(os.path.join(f'C:/Users/leonl/Seminararbeit/results/test.jpeg'), face)

In [None]:
import os
import cv2

def resize_images_in_folder(folder_path, output_folder, size=(100, 100)):
    # Create output folder if it does not exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Iterate over all files in the folder
    for filename in os.listdir(folder_path):
        img_path = os.path.join(folder_path, filename)
        
        # Check if the file is an image
        try:
            # Read the image
            img = cv2.imread(img_path)
            
            # Check if image is loaded successfully
            if img is not None:
                # Resize image
                resized_img = cv2.resize(img, size)
                
                # Save resized image to output folder
                output_path = os.path.join(output_folder, filename)
                cv2.imwrite(output_path, resized_img)
                print(f"Resized and saved {filename} to {output_folder}")
            else:
                print(f"Skipping file {filename}, as it is not a valid image")
        except Exception as e:
            print(f"Error processing file {filename}: {e}")

# Example usage
input_folder = 'C:/Users/leonl/Seminararbeit/results/realFaces1'
output_folder = 'C:/Users/leonl/Seminararbeit/results/realFaces1SMALL'
resize_images_in_folder(input_folder, output_folder)