In [None]:
# wichtigen Bibliotheken
import os
import pandas as pd
import numpy as np
from PIL import Image
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns

#Dataset laden usw.
DATASET_DIR = r"D:\Users\roscha\Desktop\ML4B\complete"
df_artwork = pd.read_csv(f"{DATASET_DIR}/artwork_dataset.csv")
df_info = pd.read_csv(f"{DATASET_DIR}/info_dataset.csv")

# Prüfen wir ein paar Infos zum DataFrame
print("Anzahl Zeilen:", len(df_artwork))
print("Spalten:", df_artwork.columns.tolist())

# Vorschau auf die Daten
print("Spalten im Datensatz:")
print(df_artwork.columns.tolist())
df_artwork.head()
print("Spalten im info-Datensatz:")
print(df_info.columns.tolist())
df_info.head()

In [None]:
# Mergen der beiden DataFrames über 'artist' UND 'title'
df_merged = pd.merge(
    df_artwork,
    df_info,
    on=['artist'],
    how='inner'
)

# Ergebnis prüfen
print(" Spalten im zusammengeführten DataFrame:")
print(df_merged.columns.tolist())
df_merged.head()

🧾 Spalten im zusammengeführten DataFrame:
['ID', 'artist', 'title', 'picture data', 'file info', 'jpg url', 'born-died', 'period', 'school', 'url', 'base', 'nationality']


Unnamed: 0,ID,artist,title,picture data,file info,jpg url,born-died,period,school,url,base,nationality
0,0,"AACHEN, Hans von",venus and adonis,"1574-88, oil on canvas, 68 x 95 cm, fogg art m...","1700*1211, true color, 252 kb",https://www.wga.hu/art/a/aachen/adonis.jpg,(1552-1615),Mannerism,painter,https://www.wga.hu/html/a/aachen/index.html,Germany,German
1,1,"AACHEN, Hans von",procuring scene,"1605-10, oil on wood, 114 x 130 cm, kunsthisto...","1370*1168, true color, 212 kb",https://www.wga.hu/art/a/aachen/z_scene.jpg,(1552-1615),Mannerism,painter,https://www.wga.hu/html/a/aachen/index.html,Germany,German
2,2,"AACHEN, Hans von",self-portrait with a glass of wine,"c. 1596, oil on canvas, 53 x 44 cm, private co...","896*1074, true color, 57 kb",https://www.wga.hu/art/a/aachen/selfport.jpg,(1552-1615),Mannerism,painter,https://www.wga.hu/html/a/aachen/index.html,Germany,German
3,3,"AACHEN, Hans von",two laughing men (self-portrait),"before 1574, oil on panel, 48 x 39 cm, archdio...","1173*1500, true color, 210 kb",https://www.wga.hu/art/a/aachen/selfpor1.jpg,(1552-1615),Mannerism,painter,https://www.wga.hu/html/a/aachen/index.html,Germany,German
4,4,"AACHEN, Hans von",portrait of emperor rudolf ii,"1606-08, oil on canvas, 60 x 48 cm, kunsthisto...","1149*1500, true color, 247 kb",https://www.wga.hu/art/a/aachen/rudolf2.jpg,(1552-1615),Mannerism,painter,https://www.wga.hu/html/a/aachen/index.html,Germany,German


In [None]:
# Nur relevante Spalten auswählen
df_clean = df_merged[['jpg url', 'period']].dropna()

# Übersicht: Welche Stilrichtungen ('period') gibt es?
print(df_clean['period'].value_counts())

period
Baroque                 14203
Early Renaissance        6011
Mannerism                4032
Northern Renaissance     3862
Medieval                 3490
High Renaissance         3298
Rococo                   2510
Impressionism            2404
Romanticism              1975
Neoclassicism            1527
Realism                  1222
Art Nouveau              1066
Name: count, dtype: int64


In [None]:
selected_styles = ['Baroque', 'Impressionism', 'Realism'] 
max_images_per_style = 500

# Auswahl + Limit
df_filtered = df_clean[df_clean['period'].isin(selected_styles)]
df_prepared = df_filtered.groupby('period').apply(
    lambda x: x.sample(n=min(len(x), max_images_per_style), random_state=42)
).reset_index(drop=True)


  df_prepared = df_filtered.groupby('period').apply(


In [None]:
#Download der Bilder

import requests
import os
from tqdm import tqdm

OUTPUT_DIR = "data_prepared"
os.makedirs(OUTPUT_DIR, exist_ok=True)

for idx, row in tqdm(df_prepared.iterrows(), total=len(df_prepared), desc="Lade Bilder herunter"):
    url = row['jpg url']
    style = row['period']
    filename = f"{style}_{idx}.jpg"

    style_folder = os.path.join(OUTPUT_DIR, style)
    os.makedirs(style_folder, exist_ok=True)
    filepath = os.path.join(style_folder, filename)

    try:
        response = requests.get(url, timeout=10)
        response.raise_for_status()

        with open(filepath, 'wb') as f:
            f.write(response.content)
    except Exception as e:
        print(f" Fehler bei Bild {url}: {e}")


📥 Lade Bilder herunter: 100%|█████████████████████████████████████████████████████| 1500/1500 [05:47<00:00,  4.32it/s]


In [1]:
# Prepariere die Bilder: Alle Bilder sollten z. B. 256×256 oder 512×512 sein (je nach Modellanforderung)

from PIL import Image
import os

# Teste ein paar Bilder
folder = "data_prepared/Baroque"
files = os.listdir(folder)
img = Image.open(os.path.join(folder, files[0]))
print(img.size)

(813, 1035)


In [None]:
#Bilder einheitlich anpassen
from PIL import Image
import os

def resize_all_images(root_folder, target_size=(256, 256)):
    for style in os.listdir(root_folder):
        style_path = os.path.join(root_folder, style)
        if not os.path.isdir(style_path):
            continue
        for file in os.listdir(style_path):
            file_path = os.path.join(style_path, file)
            try:
                img = Image.open(file_path).convert("RGB").resize(target_size)
                img.save(file_path)
            except Exception as e:
                print(f" Fehler bei {file_path}: {e}")

resize_all_images("data_prepared", target_size=(256, 256))
print(" Alle Bilder auf 256×256 skaliert.")


✅ Alle Bilder auf 256×256 skaliert.


In [None]:
# Teilt die zwei Stilrichtungen in Trainingsdatensätze um A = Barock und B = Realismus

import shutil, os

os.makedirs("data/trainA", exist_ok=True)
os.makedirs("data/trainB", exist_ok=True)

shutil.copytree("data_prepared/Baroque", "data/trainA", dirs_exist_ok=True)
shutil.copytree("data_prepared/Realism", "data/trainB", dirs_exist_ok=True)


'data/trainB'

In [1]:
from PIL import Image
import torch
print("Pillow und PyTorch sind installiert:", torch.__version__)


Pillow und PyTorch sind installiert: 2.5.1
