In [None]:
# Importing necessary libraries
import pandas as pd
import numpy as np

# Create a small dataset with missing values
data = {
    'Age': [25, np.nan, 30, 35, np.nan, 40, 45],
    'Salary': [50000, 55000, np.nan, 60000, 65000, np.nan, 70000],
    'City': ['New York', 'Los Angeles', 'Chicago', 'Houston', 'Phoenix', 'Philadelphia', 'San Antonio']
}

# Convert the dataset into a DataFrame
df = pd.DataFrame(data)

# Show the original DataFrame with missing values
def display_original_data():
    print("Original DataFrame with Missing Values:")
    print(df)
    print("\nMissing values (True = Missing, False = Not Missing):")
    print(df.isnull())

# Function to fill missing values in 'Age' and 'Salary' columns
def fill_missing_values(method='mean'):
    if method == 'mean':
        df['Age'] = df['Age'].fillna(df['Age'].mean())
        df['Salary'] = df['Salary'].fillna(df['Salary'].mean())
    elif method == 'median':
        df['Age'] = df['Age'].fillna(df['Age'].median())
        df['Salary'] = df['Salary'].fillna(df['Salary'].median())
    elif method == 'mode':
        df['Age'] = df['Age'].fillna(df['Age'].mode()[0])
        df['Salary'] = df['Salary'].fillna(df['Salary'].mode()[0])
    else:
        print("Invalid method! Please choose from 'mean', 'median', or 'mode'.")

# Provide feedback based on missing values
def provide_feedback():
    if df.isnull().sum().sum() == 0:
        print("\n✅ Well done! All missing values have been filled.")
    else:
        print("\n❌ Some missing values are still present. Make sure you have filled all missing values correctly.")

# Interactive challenge instructions
def interactive_challenge():
    print("Welcome to the Data Cleaning Challenge!")
    print("\nTask: Your goal is to fill the missing values in the 'Age' and 'Salary' columns.")
    print("You can fill missing values using one of the following methods: 'mean', 'median', or 'mode'.")

    # Show the original data
    display_original_data()

    # Ask user to choose a method for filling missing values
    method = input("\nEnter the method to fill missing values ('mean', 'median', 'mode'): ").lower()

    # Fill missing values based on user's choice
    fill_missing_values(method)

    # Display the cleaned data and provide feedback
    print("\nDataFrame after cleaning:")
    print(df)
    provide_feedback()

# Start the interactive challenge
interactive_challenge()


Welcome to the Data Cleaning Challenge!

Task: Your goal is to fill the missing values in the 'Age' and 'Salary' columns.
You can fill missing values using one of the following methods: 'mean', 'median', or 'mode'.
Original DataFrame with Missing Values:
    Age   Salary          City
0  25.0  50000.0      New York
1   NaN  55000.0   Los Angeles
2  30.0      NaN       Chicago
3  35.0  60000.0       Houston
4   NaN  65000.0       Phoenix
5  40.0      NaN  Philadelphia
6  45.0  70000.0   San Antonio

Missing values (True = Missing, False = Not Missing):
     Age  Salary   City
0  False   False  False
1   True   False  False
2  False    True  False
3  False   False  False
4   True   False  False
5  False    True  False
6  False   False  False
