In [11]:
import csv

In [16]:
# File paths
uncleaned_file = "data/funding-foerderdatenbank-english.csv"
cleaned_file = "data/funding-foerderdatenbank-cleaned.csv"

In [17]:
# Helper to capitalize only the first letter (and keep rest as is)
def capitalize_first(text):
    if not text:
        return text
    return text[0].upper() + text[1:]

In [18]:
# Helper to clean and format contact column
def clean_contact(contact):
    if not contact or not isinstance(contact, str):
        return contact
    # Split into parts, strip spaces, and reformat
    parts = [p.strip() for p in contact.split(',')]
    contact_dict = {}
    for part in parts:
        if ':' in part:
            key, value = part.split(':', 1)
            key = key.strip().capitalize()
            value = value.strip()
            contact_dict[key] = value
    # Rebuild in standard order
    keys = ['Name', 'Email', 'Phone', 'Address']
    cleaned = []
    for k in keys:
        if k in contact_dict:
            cleaned.append(f"{k}: {contact_dict[k]}")
    return ', '.join(cleaned)

In [19]:
# Read, clean, and write
with open(uncleaned_file, mode='r', encoding='utf-8') as infile:
    reader = csv.DictReader(infile)
    rows = list(reader)
    fieldnames = reader.fieldnames

cleaned_rows = []
for row in rows:
    cleaned_row = row.copy()
    # Capitalize first letter of location
    if 'location' in row and row['location']:
        cleaned_row['location'] = capitalize_first(row['location'].strip())
    # Clean contact column
    if 'contact' in row:
        cleaned_row['contact'] = clean_contact(row['contact'])
    cleaned_rows.append(cleaned_row)

with open(cleaned_file, mode='w', newline='', encoding='utf-8') as outfile:
    writer = csv.DictWriter(outfile, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(cleaned_rows)

print(f"✅ Cleaned file saved to: {cleaned_file}")
# Optionally, print a few cleaned rows for inspection
for row in cleaned_rows[:3]:
    print(row)


✅ Cleaned file saved to: data/funding-foerderdatenbank-cleaned.csv
{'name': 'Funding as part of the "Art & AI" program', 'description': 'Funding guidelines of the Federal Cultural Foundation Valid from 12.07.2024', 'domain': 'Culture, Media & Sport', 'eligibility': 'Association/Union', 'location': 'Nationwide', 'contact': 'Name: Kulturstiftung des Bundes (KSB), Email: info@kulturstiftung-bund.de, Phone: Tel: +49 345 2997-0, Address: 06110 Halle an der Saale', 'url': 'https://www.foerderdatenbank.de/FDB/Content/DE/Foerderprogramm/Bund/KultStiftBund/kunst-und-ki.html'}
{'name': 'Funding for projects on the topic of "Application of artificial intelligence (AI) in drug discovery"', 'description': 'Guideline for the funding of projects on the topic of "Application of artificial intelligence (AI) in drug discovery" dated: 19.02.2025 Federal Ministry of Education and Research BAnz AT 03.03.2025 B5', 'domain': 'Research & Innovation (topic-specific), Health & Social Affairs', 'eligibility': 'U