In [1]:
import pandas as pd
import os
import re

input_dir = "./LinkedIn Data Public"
output_dir = "./CleanedLinkedInData"

os.makedirs(output_dir, exist_ok=True)

def clean_text(text):
    if pd.isna(text):
        return ""
    text = re.sub(r'[^\w\s,.-]', '', str(text))
    text = ' '.join(text.split())
    return text.strip()

def safe_read_csv(filepath):
    try:
        return pd.read_csv(filepath, encoding='utf-8')
    except UnicodeDecodeError:
        print(f"UTF-8 failed, trying ISO-8859-1 for: {filepath}")
        return pd.read_csv(filepath, encoding='ISO-8859-1')

def standardize_name(name):
    base_name = os.path.splitext(name)[0]
    name_part = base_name.split('-')[-1]
    name_part = re.sub(r'[^A-Za-z\s]', ' ', name_part)
    name_part = ' '.join(word.capitalize() for word in name_part.split())
    
    return f"{name_part}.csv"
for file in os.listdir(input_dir):
    if file.endswith('.csv'):
        input_path = os.path.join(input_dir, file)
        try:
            df = safe_read_csv(input_path)
            df.columns = [col.strip() for col in df.columns]

            required_cols = ['First Name', 'Last Name', 'Company']
            if not all(col in df.columns for col in required_cols):
                print(f"Skipping {file} - missing required columns")
                continue

            df['First Name'] = df['First Name'].apply(clean_text)
            df['Last Name'] = df['Last Name'].apply(clean_text)
            df['Company'] = df['Company'].apply(clean_text)

            cleaned_df = df[required_cols]
            new_name = standardize_name(file)
            output_path = os.path.join(output_dir, new_name)

            cleaned_df.to_csv(output_path, index=False)
            print(f" Saved cleaned file as: {new_name}")

        except Exception as e:
            print(f" Error processing {file}: {str(e)}")

 Saved cleaned file as: Aaditya Raj.csv
 Saved cleaned file as: Abhishek Singh.csv
 Saved cleaned file as: Aditya Singh.csv
 Saved cleaned file as: Afzl Raza.csv
 Saved cleaned file as: Ajay Jatav.csv
 Saved cleaned file as: Ajit Yadav.csv
 Saved cleaned file as: Akanksha.csv
 Saved cleaned file as: Alok Raj.csv
 Saved cleaned file as: Aman Adarsh.csv
 Saved cleaned file as: Aman Singh.csv
 Saved cleaned file as: Amit Kumar.csv
 Saved cleaned file as: Anamika Kumari.csv
 Saved cleaned file as: Anand Pandey.csv
 Saved cleaned file as: Anoop Kumar.csv
 Saved cleaned file as: Anshu Kumar.csv
 Saved cleaned file as: Anuradha Tiwari.csv
 Saved cleaned file as: Anushri Mishra.csv
 Saved cleaned file as: Aradhya Patel.csv
 Saved cleaned file as: Arjun Kadam.csv
 Saved cleaned file as: Arpita Tripathi.csv
 Saved cleaned file as: Arun Kumar.csv
 Saved cleaned file as: Aryan Saini.csv
 Saved cleaned file as: Ashwin Yadav.csv
 Saved cleaned file as: Ayush Kumar.csv
 Saved cleaned file as: Ayush Y