In [1]:
import pandas as pd
import numpy as np

In [2]:
#Load Data: Create a DataFrame with customer data
customer_data = pd.DataFrame({
    'Customer_ID': [1, 2, 3, 4, 5, 6],
    'Customer_Type': ['New', 'Regular', np.nan, 'Regular', 'New', np.nan],
    'Age': [25, 40, np.nan, 35, np.nan, 50]
})

print("Original Customer Data:")
print(customer_data, "\n")

Original Customer Data:
   Customer_ID Customer_Type   Age
0            1           New  25.0
1            2       Regular  40.0
2            3           NaN   NaN
3            4       Regular  35.0
4            5           New   NaN
5            6           NaN  50.0 



In [3]:
# 2️⃣ Impute Numerical Data: Fill missing 'Age' values with the median
median_age = customer_data['Age'].median()
customer_data['Age'] = customer_data['Age'].fillna(median_age)

print("After Filling Missing Age Values with Median:")
print(customer_data, "\n")


After Filling Missing Age Values with Median:
   Customer_ID Customer_Type   Age
0            1           New  25.0
1            2       Regular  40.0
2            3           NaN  37.5
3            4       Regular  35.0
4            5           New  37.5
5            6           NaN  50.0 



In [4]:
# Fill missing 'Customer_Type' with a new category 'Unknown'
customer_data['Customer_Type'] = customer_data['Customer_Type'].fillna('Unknown')

# Perform one-hot encoding
encoded_data = pd.get_dummies(customer_data, columns=['Customer_Type'], drop_first=False)

print("After One-Hot Encoding 'Customer_Type':")
print(encoded_data, "\n")

After One-Hot Encoding 'Customer_Type':
   Customer_ID   Age  Customer_Type_New  Customer_Type_Regular  \
0            1  25.0               True                  False   
1            2  40.0              False                   True   
2            3  37.5              False                  False   
3            4  35.0              False                   True   
4            5  37.5               True                  False   
5            6  50.0              False                  False   

   Customer_Type_Unknown  
0                  False  
1                  False  
2                   True  
3                  False  
4                  False  
5                   True   



In [5]:
print("✅ Final Dataset Ready for Machine Learning:")
print(encoded_data)

✅ Final Dataset Ready for Machine Learning:
   Customer_ID   Age  Customer_Type_New  Customer_Type_Regular  \
0            1  25.0               True                  False   
1            2  40.0              False                   True   
2            3  37.5              False                  False   
3            4  35.0              False                   True   
4            5  37.5               True                  False   
5            6  50.0              False                  False   

   Customer_Type_Unknown  
0                  False  
1                  False  
2                   True  
3                  False  
4                  False  
5                   True  
