# Captioning

In [None]:
!pip install deep-translator

In [None]:
from google.colab import drive
drive.mount('/content/drive')

import zipfile
import json
import os
import pandas as pd
from transformers import BlipProcessor, BlipForConditionalGeneration
from PIL import Image
from deep_translator import GoogleTranslator
import time
import re

In [None]:
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

def generate_caption(image_path):
    image = Image.open(image_path).convert("RGB")
    inputs = processor(images=image, return_tensors="pt")
    output_ids = model.generate(**inputs)
    caption = processor.decode(output_ids[0], skip_special_tokens=True)
    return caption

def translate_caption(caption, target_lang='de'):
    translator = GoogleTranslator(source='en', target=target_lang)
    translated_caption = translator.translate(caption)
    return translated_caption

main_dir = '/content/drive/MyDrive/Projekt_Mobilisierung/Data/LINKE'
output_dir = '/content/drive/MyDrive/Projekt_Mobilisierung/Data/LINKE'
progress_file = os.path.join(output_dir, 'processed_images.txt')

image_paths = [
    os.path.join(root, file)
    for root, _, files in os.walk(main_dir)
    for file in files
    if file.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')) and not file.lower().endswith('.zip')
]

if os.path.exists(progress_file):
    with open(progress_file, 'r') as f:
        processed_images = set(f.read().splitlines())
else:
    processed_images = set()

image_paths_to_process = [path for path in image_paths if path not in processed_images]

sample_size = 10
sample_paths = image_paths_to_process[:sample_size]

start_time = time.time()

sample_data = [
    {
        'image_path': path,
        'caption': translate_caption(generate_caption(path))
    }
    for path in sample_paths
]

end_time = time.time()

avg_time_per_image = (end_time - start_time) / sample_size
total_images = len(image_paths_to_process)
estimated_total_time = total_images * avg_time_per_image

print(f"Durchschnittliche Zeit pro Bild: {avg_time_per_image:.2f} Sekunden")
print(f"Geschätzte Gesamtzeit für alle Bilder: {estimated_total_time:.2f} Sekunden ({estimated_total_time / 60:.2f} Minuten)")

existing_batches = [
    int(re.search(r'batch_(\d+).csv', file).group(1))
    for file in os.listdir(output_dir)
    if re.search(r'batch_(\d+).csv', file)
]

if existing_batches:
    next_batch_number = max(existing_batches) + 1
else:
    next_batch_number = 1

batch_size = 50
data = []

for i, image_path in enumerate(image_paths_to_process):
    caption = generate_caption(image_path)
    translated_caption = translate_caption(caption)
    data.append({'image_path': image_path, 'caption': translated_caption})

    if (i + 1) % batch_size == 0 or (i + 1) == total_images:
        df = pd.DataFrame(data)
        df.to_csv(os.path.join(output_dir, f'all_images_with_captions_batch_{next_batch_number}.csv'), index=False)

        with open(progress_file, 'a') as f:
            for item in data:
                f.write(f"{item['image_path']}\n")

        data = []
        next_batch_number += 1

if data:
    df = pd.DataFrame(data)
    df.to_csv(os.path.join(output_dir, f'all_images_with_captions_batch_{next_batch_number}.csv'), index=False)
    with open(progress_file, 'a') as f:
        for item in data:
            f.write(f"{item['image_path']}\n")


In [None]:
input_dir = '/content/drive/MyDrive/Projekt_Mobilisierung/Data/LINKE'
output_file = os.path.join(input_dir, 'all_images_with_captions_combined.csv')
additional_file = os.path.join(input_dir, 'all_images_with_captions_combined.csv')

dfs = []

for filename in os.listdir(input_dir):
    if filename.startswith('all_images_with_captions_batch_') and filename.endswith('.csv'):
        file_path = os.path.join(input_dir, filename)
        df = pd.read_csv(file_path)
        dfs.append(df)

if os.path.exists(additional_file):
    df_additional = pd.read_csv(additional_file)
    dfs.append(df_additional)

combined_df = pd.concat(dfs, ignore_index=True).drop_duplicates()

combined_df.to_csv(output_file, index=False)

print(f"Alle Dateien wurden zusammengeführt und in {output_file} gespeichert.")

In [None]:
csv_file = '/content/drive/MyDrive/Projekt_Mobilisierung/Data/LINKE/all_images_with_captions_combined.csv'
txt_file = '/content/drive/MyDrive/Projekt_Mobilisierung/Data/LINKE/processed_images.txt'

df = pd.read_csv(csv_file)

csv_image_paths = set(df['image_path'])

with open(txt_file, 'r') as f:
    txt_image_paths = set(f.read().splitlines())

missing_paths = txt_image_paths - csv_image_paths

if missing_paths:
    print(f"Die folgenden {len(missing_paths)} Bildpfade aus der TXT-Datei sind nicht in der CSV-Datei vorhanden:")
    for path in missing_paths:
        print(path)
    txt_image_paths = txt_image_paths - missing_paths
    with open(txt_file, 'w') as f:
        for path in sorted(txt_image_paths):
            f.write(f"{path}\n")
    print(f"{len(missing_paths)} Pfade wurden aus der TXT-Datei entfernt.")
else:
    print("Alle Bildpfade aus der TXT-Datei sind in der CSV-Datei vorhanden.")

Alle Bildpfade aus der TXT-Datei sind in der CSV-Datei vorhanden.


In [None]:
root_dir = '/content/drive/MyDrive/Projekt_Mobilisierung/Data'
output_file = os.path.join(root_dir, 'all_images_with_captions_combined_final.csv')

dfs = []

for subdir, _, files in os.walk(root_dir):
    for file in files:
        if file == 'all_images_with_captions_combined.csv':
            file_path = os.path.join(subdir, file)
            df = pd.read_csv(file_path)
            dfs.append(df)

combined_df = pd.concat(dfs, ignore_index=True)

combined_df.to_csv(output_file, index=False)

print(f"Alle Dateien wurden zusammengeführt und in {output_file} gespeichert.")

Alle Dateien wurden zusammengeführt und in /content/drive/MyDrive/Projekt_Mobilisierung/Data/all_images_with_captions_combined_final.csv gespeichert.


## Anfügen Captions an Trainingsdatensatz

In [None]:
root_dir = '/content/drive/MyDrive/Projekt_Mobilisierung/Data'
all_images_file = os.path.join(root_dir, 'all_images_with_captions_combined_final.csv')
all_accs_file = os.path.join(root_dir, 'anno_sample_strat_df.csv')
output_file = os.path.join(root_dir, 'strat_caption.csv')

all_images_df = pd.read_csv(all_images_file)
all_accs_df = pd.read_csv(all_accs_file)

all_images_df['image_id'] = all_images_df['image_path'].apply(lambda x: os.path.splitext(os.path.basename(x))[0])

combined_df = pd.merge(all_images_df, all_accs_df, left_on='image_id', right_on='id', how='inner')

combined_df.to_csv(output_file, index=False)

print(f"Die Dateien wurden zusammengeführt und in {output_file} gespeichert.")

Die Dateien wurden zusammengeführt und in /content/drive/MyDrive/Projekt_Mobilisierung/Data/strat_caption.csv gespeichert.


Quellen:

Achmann-Denkler, M. (2024). michaelachmann/social-media-lab: DOI Release (v0.0.12). Zenodo. https://doi.org/10.5281/zenodo.10618621

Achmann-Denkler, M. (2024). “Visual Exploration.” January 15, 2024. https://doi.org/10.5281/zenodo.10039756.