In [None]:
import pandas as pd
import ast

# Step 1: Load CSV with flexible encoding handling
try:
    df = pd.read_csv("/content/Copy of cleaned_.csv", encoding='utf-8')
except UnicodeDecodeError:
    df = pd.read_csv("/content/Copy of cleaned_.csv", encoding='ISO-8859-1')

# Step 2: Normalize column names
df.columns = df.columns.str.strip().str.lower()

# Step 3: Print available columns to debug if needed
print("Columns in the file:", df.columns.tolist())

# Step 4: Define column mapping (in case names are slightly off)
column_mapping = {
    'description': 'description',
    'ingredients': 'ingredients',
    'calories': 'calories',
    'protein': 'protein (g)',
    'protein (g)': 'protein (g)',
    'fat': 'fat (g)',
    'fat (g)': 'fat (g)',
    'carbs': 'carbs (g)',
    'carbohydrates': 'carbs (g)',
    'carbs (g)': 'carbs (g)'
}

# Step 5: Apply column renaming
df = df.rename(columns=column_mapping)

# Step 6: Keep only the required columns
required_cols = ['description', 'ingredients', 'calories', 'protein (g)', 'fat (g)', 'carbs (g)']
df = df[[col for col in required_cols if col in df.columns]]

# Step 7: Clean description and ingredients
df['description'] = df['description'].astype(str).str.strip()

def parse_ingredients(val):
    try:
        return ast.literal_eval(val)
    except:
        return [val] if isinstance(val, str) else []

df['ingredients'] = df['ingredients'].apply(parse_ingredients)

# Step 8: Clean numeric columns
for col in ['calories', 'protein (g)', 'fat (g)', 'carbs (g)']:
    df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)

# Step 9: Round numeric values
df = df.round({
    'calories': 2,
    'protein (g)': 2,
    'fat (g)': 2,
    'carbs (g)': 2
})

# Step 10: Save cleaned CSV
df.to_csv("cleaned_recipes.csv", index=False)

# Show sample
print(df.head())
