In [9]:
# ================================
# STEP 1: Install dependencies
# ================================
!pip install validators

# ================================
# STEP 2: Import libraries
# ================================
import pandas as pd
import validators
from google.colab import files

# ================================
# STEP 3: Upload CSV file
# ================================
print("ðŸ“¤ Please upload your leads file (CSV)...")
uploaded = files.upload()

# Automatically detect filename
filename = list(uploaded.keys())[0]
print("âœ… Uploaded file:", filename)

# Read CSV into DataFrame
df = pd.read_csv(filename)
print("\nðŸ“Š Preview of uploaded leads file:")
print(df.head())

# ================================
# STEP 4: Data Cleaning Function
# ================================
def clean_leads(df):
    # Make a copy
    df = df.copy()

    # Standardize column names (lowercase, no spaces)
    df.columns = df.columns.str.strip().str.lower().str.replace(" ", "_")

    # Remove duplicates
    df = df.drop_duplicates()

    # Drop completely empty rows
    df = df.dropna(how="all")

    # Fill missing values (basic handling, can improve later)
    df = df.fillna("")

    return df

# Clean data
df = clean_leads(df)

# ================================
# STEP 5: Validate Emails & URLs
# ================================
def validate_email(email):
    return validators.email(email)

def validate_url(url):
    return validators.url(url)

# Add validation columns if email/website exists
if "email" in df.columns:
    df["valid_email"] = df["email"].apply(lambda x: bool(validate_email(x)))
else:
    df["valid_email"] = None

if "website" in df.columns:
    df["valid_website"] = df["website"].apply(lambda x: bool(validate_url(x)))
else:
    df["valid_website"] = None

# ================================
# STEP 6: Save Cleaned File
# ================================
output_file = "cleaned_leads.csv"
df.to_csv(output_file, index=False)

print("\nâœ… Cleaning & validation complete!")
print(f"ðŸ“‚ Cleaned file saved as: {output_file}")

# ================================
# STEP 7: Download the Clean File
# ================================
files.download(output_file)


ðŸ“¤ Please upload your leads file (CSV)...


Saving Leads.csv - Sheet1.csv to Leads.csv - Sheet1 (5).csv
âœ… Uploaded file: Leads.csv - Sheet1 (5).csv

ðŸ“Š Preview of uploaded leads file:
        company_name contact_name            email  company_size industry
0         Alpha Tech     John Doe   john@alpha.com          1200     Tech
1     Beta Solutions   Jane Smith        jane@beta           450  Finance
2         Gamma Corp      Bob Lee    bob@gamma.com           800     Tech
3  Delta Enterprises   Alice Wong  alice@delta.com           200   Retail

âœ… Cleaning & validation complete!
ðŸ“‚ Cleaned file saved as: cleaned_leads.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>