# 🧹 Clean ISB Translated Data (No Replacements Needed)
This notebook formats the contact column but skips text replacements (since none are needed).

In [1]:

import csv
import pandas as pd
import ast


In [None]:

# --- File paths ---
uncleaned_isb_english = 'funding-isb-english.csv'
cleaned_isb_english = 'funding-isb-english-cleaned.csv'


In [3]:

# --- Clean contact formatting if separator is missing ---
def clean_contact_info(contact_info):
    if "Advice on business development" in contact_info:
        if "|" not in contact_info.split("Advice on business development")[1][:1]:
            contact_info = contact_info.replace("Advice on business development", "Advice on business development |", 1)
    return contact_info


In [4]:

# --- Format list-style contact info into readable lines ---
def format_contact_info(contact_column):
    formatted_contacts = []
    try:
        contacts = ast.literal_eval(contact_column) if isinstance(contact_column, str) else contact_column
    except:
        return contact_column

    for contact in contacts:
        parts = [p.strip() for p in contact.split("|")]
        if len(parts) == 3:
            formatted_contacts.append(f"- Name: {parts[0]} | Phone: {parts[1]} | Email: {parts[2]}")

    return "\n".join(formatted_contacts) if formatted_contacts else "No contact information available."


In [5]:

# --- Load and clean rows (only contact formatting applied) ---
with open(uncleaned_isb_english, mode='r', encoding='utf-8') as infile:
    reader = csv.DictReader(infile)
    data = list(reader)

cleaned_data = []
for row in data:
    cleaned_row = {}
    for key, value in row.items():
        if key == 'contact':
            value = clean_contact_info(value)
            value = format_contact_info(value)
        cleaned_row[key] = value
    cleaned_data.append(cleaned_row)


FileNotFoundError: [Errno 2] No such file or directory: 'data/funding-isb-english.csv'

In [None]:

# --- Save cleaned output ---
with open(cleaned_isb_english, mode='w', encoding='utf-8', newline='') as outfile:
    writer = csv.DictWriter(outfile, fieldnames=data[0].keys())
    writer.writeheader()
    writer.writerows(cleaned_data)

print(f"✅ Cleaned file saved to: {cleaned_isb_english}")
