In [1]:
import pandas as pd
import os

# Step 1: Set file paths
input_file = 'zara.csv'
output_folder = 'cleaned_data'
output_file = os.path.join(output_folder, 'cleaned_zara.csv')

# Step 2: Create the output folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)

# Step 3: Load the CSV with proper delimiter handling
try:
    df = pd.read_csv(input_file)
    if df.shape[1] == 1:
        # Try semicolon if it looks like one-column data
        df = pd.read_csv(input_file, delimiter=';')
except pd.errors.ParserError:
    df = pd.read_csv(input_file, delimiter=';')

# Step 4: Clean column names
df.columns = df.columns.str.strip()

# Step 5: Drop unwanted columns and rows
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
df.dropna(how='all', inplace=True)

# Step 6: Save cleaned file
df.to_csv(output_file, index=False)
print(f"✅ Cleaned CSV saved to: {output_file}")


✅ Cleaned CSV saved to: cleaned_data\cleaned_zara.csv
