In [2]:
import pandas as pd
import numpy as np

data = {
    "ID": [1,2,3,4,5,6,7,8,9,10],
    "Age": [25,np.nan,40,33,28,np.nan,36,42,29,22],
    "City": ["New York","Los Angeles","Chicago",np.nan,"New York","New York","Chicago","Los Angeles","New York",np.nan],
    "Gender": ["M",np.nan,"F","F","Female","M","F","Male","F","Female"]
}

df = pd.DataFrame(data)
print("Initial Dataset:\n", df, "\n")

# 1. Fill missing Age with mean, and missing City with 'Unknown'
def fill_missing_values(df):
    df['Age'] = df['Age'].fillna(df['Age'].mean())
    df['City'] = df['City'].fillna("Unknown")
    return df

df = fill_missing_values(df)
print("After Filling Missing Values:\n", df, "\n")

# 2. Remove duplicates based on all columns
def remove_duplicates(df):
    df = df.drop_duplicates()
    return df

df = remove_duplicates(df)
print("After Removing Duplicates:\n", df, "\n")

# 3. Replace inconsistent values in Gender column
def fix_gender_inconsistencies(df):
    df['Gender'] = df['Gender'].replace({"M": "Male", "F": "Female"})
    return df

df = fix_gender_inconsistencies(df)
print("After Fixing Gender Inconsistencies:\n", df, "\n")

# 4. Group Age into ranges
def group_age_ranges(df):
    bins = [18, 30, 40, 50]
    labels = ["18-30", "30-40", "40-50"]
    df['Age_Group'] = pd.cut(df['Age'], bins=bins, labels=labels, right=False)
    return df

df = group_age_ranges(df)
print("After Grouping Age into Ranges:\n", df, "\n")

# 5. Convert City column into dummy variables
def create_city_dummies(df):
    city_dummies = pd.get_dummies(df['City'], prefix='City')
    df = pd.concat([df, city_dummies], axis=1)
    return df

df = create_city_dummies(df)
print("Final Dataset with Dummy Variables:\n", df, "\n")


Initial Dataset:
    ID   Age         City  Gender
0   1  25.0     New York       M
1   2   NaN  Los Angeles     NaN
2   3  40.0      Chicago       F
3   4  33.0          NaN       F
4   5  28.0     New York  Female
5   6   NaN     New York       M
6   7  36.0      Chicago       F
7   8  42.0  Los Angeles    Male
8   9  29.0     New York       F
9  10  22.0          NaN  Female 

After Filling Missing Values:
    ID     Age         City  Gender
0   1  25.000     New York       M
1   2  31.875  Los Angeles     NaN
2   3  40.000      Chicago       F
3   4  33.000      Unknown       F
4   5  28.000     New York  Female
5   6  31.875     New York       M
6   7  36.000      Chicago       F
7   8  42.000  Los Angeles    Male
8   9  29.000     New York       F
9  10  22.000      Unknown  Female 

After Removing Duplicates:
    ID     Age         City  Gender
0   1  25.000     New York       M
1   2  31.875  Los Angeles     NaN
2   3  40.000      Chicago       F
3   4  33.000      Unknown     