In [1]:
import pandas as pd
import re

# --- SETTINGS ---
infile  = "input.csv"
outfile = "output.csv"

# Read as text to avoid NaNs and type-casting issues
df = pd.read_csv(infile, encoding="latin1", dtype=str, keep_default_na=False)

# Base and regex to find ONLY the uploads URLs
base_literal = "http://www.shinodadesigncenter.net/wordpress/wp-content/uploads"
base_regex   = r"http://www\.shinodadesigncenter\.net/wordpress/wp-content/uploads"

# Match: (base)/rest  -> we will turn it into (base)_{rest with slashes -> underscores}
pattern = re.compile(rf"({base_regex})/([^\s\"'<>)\]]+)")

def _repl(m):
    base = m.group(1)               # the uploads base (without trailing slash)
    rest = m.group(2).replace("/", "_")  # replace only in the tail
    return f"{base}_{rest}"

def fix_text(val: str):
    if isinstance(val, str) and val:
        # idempotent: won't touch already-converted URLs because they have '_' not '/'
        return pattern.sub(_repl, val)
    return val

# Apply to ALL text columns (works for descriptions, titles, etc.)
for col in df.columns:
    df[col] = df[col].apply(fix_text)

# Save back
df.to_csv(outfile, index=False, encoding="utf-8")
print("✅ Done. Updated file saved as", outfile)


✅ Done. Updated file saved as output.csv


In [7]:
import pandas as pd
import re

# --- SETTINGS ---
infile  = "input.csv"
outfile = "output.csv"

# Read CSV as text
df = pd.read_csv(infile, encoding="latin1", dtype=str, keep_default_na=False)

# Base regex for URL replacement
base_regex = r"http://www\.shinodadesigncenter\.net/wordpress/wp-content/uploads"
pattern = re.compile(rf"({base_regex})/([^\s\"'<>)\]]+)")

def _repl(m):
    base = m.group(1)
    rest = m.group(2).replace("/", "_")
    return f"{base}_{rest}"

def fix_text(val: str):
    if isinstance(val, str) and val:
        return pattern.sub(_repl, val)
    return val

# Apply fix only on text columns
for col in df.columns:
    df[col] = df[col].apply(fix_text)

# --- ADD HANDLE COLUMN ---
# Example: Auto-generate handle from first column (say "Title") if it exists
if "Title" in df.columns:
    df["Handle"] = df["Title"].str.lower().str.replace(r"[^a-z0-9]+", "-", regex=True).str.strip("-")
else:
    # If no title, just use row numbers
    df["Handle"] = ["product-" + str(i+1) for i in range(len(df))]

# Save updated file
df.to_csv(outfile, index=False, encoding="utf-8")
print("✅ Done. Updated file with 'Handle' column saved as", outfile)


✅ Done. Updated file with 'Handle' column saved as output.csv
