In [None]:
import pandas as pd

# 🔹 Step 1: Create messy sample data
data = {
    'Name': ['Alice ', ' Bob', 'Charlie', 'David', 'Eve', 'Alice '],
    'Age': ['25', 'thirty', 35, 40, None, '25'],
    'Gender': ['F', 'M', 'M', 'Male', 'Female', 'F'],
    'Score': [85.5, None, 91.2, 88.0, 79.5, 85.5]
}

df = pd.DataFrame(data)
print("🔍 Original Data:")
print(df)

# 🔹 Step 2: Clean the data

# 1. Trim whitespace from 'Name'
df['Name'] = df['Name'].str.strip()

# 2. Remove duplicate rows
df = df.drop_duplicates()

# 3. Replace 'thirty' in 'Age' with 30 and convert 'Age' to numeric
df['Age'] = df['Age'].replace('thirty', 30)
df['Age'] = pd.to_numeric(df['Age'], errors='coerce')

# 4. Standardize 'Gender' values
df['Gender'] = df['Gender'].replace({'M': 'Male', 'F': 'Female'})

# 5. Fill missing 'Score' values with the mean
df['Score'] = df['Score'].fillna(df['Score'].mean())

# 6. Drop rows with missing 'Age'
df = df.dropna(subset=['Age'])

# 7. Rename all columns to lowercase
df.columns = [col.lower() for col in df.columns]

# 8. Add a new column 'passed' → True if Score > 80
df['passed'] = df['score'] > 80

# 🔹 Step 3: Display cleaned data
print("\n✅ Cleaned Data:")
print(df)

# 🔹 Step 4: Extra insights
print("\n📊 Unique Names:", df['name'].nunique())
print("📊 Unique Genders:", df['gender'].unique())

# 🔹 Step 5: Sort by Age descending
print("\n🔽 Sorted by Age (Descending):")
print(df.sort_values(by='age', ascending=False))
