In [1]:
import pandas as pd
import numpy as np 

In [None]:
customer_feedback_data = {
    'CustomerID': [101, 102, 103, 104, 105, 106],
    'Product_ID': ['P001', 'P002', 'P001', 'P003', 'P002', 'P001'],
    'Rating': [5, 4, np.nan, 3, 5, np.nan], 
    'Feedback_Text': ['Great product!', 'Love it!', 'Satisfied', 
                      np.nan, 'Excellent!', np.nan], 
    'Purchase_Amount': [120.50, 50.00, 85.20, 200.00, 75.00, 150.00],
    'Region': ['East', 'West', 'East', 'North', 'South', 'West']
}
df_feedback = pd.DataFrame(customer_feedback_data)

In [3]:
print("Original Customer Feedback Data (with missing values):")
print(df_feedback)
print("\nOriginal Data Info:")
df_feedback.info()

Original Customer Feedback Data (with missing values):
   CustomerID Product_ID  Rating   Feedback_Text  Purchase_Amount Region
0         101       P001     5.0  Great product!            120.5   East
1         102       P002     4.0        Love it!             50.0   West
2         103       P001     NaN       Satisfied             85.2   East
3         104       P003     3.0             NaN            200.0  North
4         105       P002     5.0      Excellent!             75.0  South
5         106       P001     NaN             NaN            150.0   West

Original Data Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Data columns (total 6 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   CustomerID       6 non-null      int64  
 1   Product_ID       6 non-null      object 
 2   Rating           4 non-null      float64
 3   Feedback_Text    4 non-null      object 
 4   Purchase_Amount  6 non-null     

In [4]:
print("Boolean DataFrame showing missing values (True = NaN):")
print(df_feedback.isnull())

Boolean DataFrame showing missing values (True = NaN):
   CustomerID  Product_ID  Rating  Feedback_Text  Purchase_Amount  Region
0       False       False   False          False            False   False
1       False       False   False          False            False   False
2       False       False    True          False            False   False
3       False       False   False           True            False   False
4       False       False   False          False            False   False
5       False       False    True           True            False   False


In [5]:
print("\nNumber of missing values per column:")
print(df_feedback.isnull().sum())


Number of missing values per column:
CustomerID         0
Product_ID         0
Rating             2
Feedback_Text      2
Purchase_Amount    0
Region             0
dtype: int64


In [6]:
print(f"\nTotal missing values in the DataFrame:\
      {df_feedback.isnull().sum().sum()}")


Total missing values in the DataFrame:      4


In [7]:
df_dropped_any_row = df_feedback.dropna()
print("\nDataFrame after dropping any row with a missing value:")
print(df_dropped_any_row)
print("\nInfo after dropping any row:")
df_dropped_any_row.info()


DataFrame after dropping any row with a missing value:
   CustomerID Product_ID  Rating   Feedback_Text  Purchase_Amount Region
0         101       P001     5.0  Great product!            120.5   East
1         102       P002     4.0        Love it!             50.0   West
4         105       P002     5.0      Excellent!             75.0  South

Info after dropping any row:
<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 0 to 4
Data columns (total 6 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   CustomerID       3 non-null      int64  
 1   Product_ID       3 non-null      object 
 2   Rating           3 non-null      float64
 3   Feedback_Text    3 non-null      object 
 4   Purchase_Amount  3 non-null      float64
 5   Region           3 non-null      object 
dtypes: float64(2), int64(1), object(3)
memory usage: 168.0+ bytes


In [8]:
df_dropped_any_col = df_feedback.dropna(axis=1)
print("\nDataFrame after dropping any column with a missing value:")
print(df_dropped_any_col)
print("\nInfo after dropping any column:")
df_dropped_any_col.info()


DataFrame after dropping any column with a missing value:
   CustomerID Product_ID  Purchase_Amount Region
0         101       P001            120.5   East
1         102       P002             50.0   West
2         103       P001             85.2   East
3         104       P003            200.0  North
4         105       P002             75.0  South
5         106       P001            150.0   West

Info after dropping any column:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Data columns (total 4 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   CustomerID       6 non-null      int64  
 1   Product_ID       6 non-null      object 
 2   Purchase_Amount  6 non-null      float64
 3   Region           6 non-null      object 
dtypes: float64(1), int64(1), object(2)
memory usage: 324.0+ bytes


In [10]:
df_filled_mean = df_feedback.copy()
mean_rating = df_filled_mean['Rating'].mean()
df_filled_mean['Rating'] = df_filled_mean['Rating'].fillna(mean_rating)

print("\nDataFrame after filling 'Rating' NaNs with the mean:")
print(df_filled_mean)
print("\nInfo after filling 'Rating' with mean:")
df_filled_mean.info()


DataFrame after filling 'Rating' NaNs with the mean:
   CustomerID Product_ID  Rating   Feedback_Text  Purchase_Amount Region
0         101       P001    5.00  Great product!            120.5   East
1         102       P002    4.00        Love it!             50.0   West
2         103       P001    4.25       Satisfied             85.2   East
3         104       P003    3.00             NaN            200.0  North
4         105       P002    5.00      Excellent!             75.0  South
5         106       P001    4.25             NaN            150.0   West

Info after filling 'Rating' with mean:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Data columns (total 6 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   CustomerID       6 non-null      int64  
 1   Product_ID       6 non-null      object 
 2   Rating           6 non-null      float64
 3   Feedback_Text    4 non-null      object 
 4   Purchase_Amoun

In [None]:
df_filled_constant = df_feedback.copy() # Work on another copy
df_filled_constant['Feedback_Text'] = df_filled_constant['Feedback_Text'].fillna('No Feedback Provided')

print("\nDataFrame after filling 'Feedback_Text' NaNs with a constant string:")
print(df_filled_constant)
print("\nInfo after filling 'Feedback_Text' with constant:")
df_filled_constant.info()


DataFrame after filling 'Feedback_Text' NaNs with a constant string:
   CustomerID Product_ID  Rating         Feedback_Text  Purchase_Amount Region
0         101       P001     5.0        Great product!            120.5   East
1         102       P002     4.0              Love it!             50.0   West
2         103       P001     NaN             Satisfied             85.2   East
3         104       P003     3.0  No Feedback Provided            200.0  North
4         105       P002     5.0            Excellent!             75.0  South
5         106       P001     NaN  No Feedback Provided            150.0   West

Info after filling 'Feedback_Text' with constant:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Data columns (total 6 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   CustomerID       6 non-null      int64  
 1   Product_ID       6 non-null      object 
 2   Rating           4 non-null      floa