In [1]:
import pandas as pd
import numpy as np


def create_sales_dataframe():
    data = {
        'transaction_id': [101, 102, 103, 104, 105, 106, 107, 108, 109, 110],
        'product': ['Laptop', 'Mouse', 'Keyboard', 'Monitor', 'Webcam',
                    'Laptop', 'Headphones', 'Speaker', 'Keyboard', 'Mouse'],
        'category': ['Electronics', 'Accessories', 'Accessories', 'Electronics', 'Accessories',
                     'Electronics', 'Audio', 'Audio', 'Accessories', 'Accessories'],
        'price': [1200, 25, 75, 300, np.nan, 1500, 120, 80, 60, 30],
        'quantity': [1, 2, 1, 1, 3, 1, 2, np.nan, 1, 2],
        'customer_rating': [4.5, 4.0, np.nan, 4.8, 3.5, 4.9, 4.2, 3.9, 4.1, np.nan],
        'sale_dt': ['2023-01-05', '2023-01-05', '2023-01-06', '2023-01-07', '2023-01-07',
                    '2023-01-08', '2023-01-08', '2023-01-09', '2023-01-09', '2023-01-10'],
        'region': ['East', 'West', 'North', 'East', 'South',
                   'West', 'North', 'East', np.nan, 'South']
    }

    return pd.DataFrame(data)


df_sales = create_sales_dataframe()
df_sales


Unnamed: 0,transaction_id,product,category,price,quantity,customer_rating,sale_dt,region
0,101,Laptop,Electronics,1200.0,1.0,4.5,2023-01-05,East
1,102,Mouse,Accessories,25.0,2.0,4.0,2023-01-05,West
2,103,Keyboard,Accessories,75.0,1.0,,2023-01-06,North
3,104,Monitor,Electronics,300.0,1.0,4.8,2023-01-07,East
4,105,Webcam,Accessories,,3.0,3.5,2023-01-07,South
5,106,Laptop,Electronics,1500.0,1.0,4.9,2023-01-08,West
6,107,Headphones,Audio,120.0,2.0,4.2,2023-01-08,North
7,108,Speaker,Audio,80.0,,3.9,2023-01-09,East
8,109,Keyboard,Accessories,60.0,1.0,4.1,2023-01-09,
9,110,Mouse,Accessories,30.0,2.0,,2023-01-10,South


In [2]:
def get_missing_data_report(col):
    total_missing = col.isna().sum()
    percent_missing = round((total_missing / len(col)) * 100, 2)
    dtype = col.dtype

    print(f"Отчет о пропущенных значениях:")
    print(f"Столбец [{col.name}]: NaN значений: [{total_missing}] "
          f"([{percent_missing}%]), Тип данных: [{dtype}]")


In [3]:
get_missing_data_report(df_sales["price"])


Отчет о пропущенных значениях:
Столбец [price]: NaN значений: [1] ([10.0%]), Тип данных: [float64]


In [4]:
get_missing_data_report(df_sales["quantity"])


Отчет о пропущенных значениях:
Столбец [quantity]: NaN значений: [1] ([10.0%]), Тип данных: [float64]


In [5]:
get_missing_data_report(df_sales["customer_rating"])


Отчет о пропущенных значениях:
Столбец [customer_rating]: NaN значений: [2] ([20.0%]), Тип данных: [float64]


In [6]:
get_missing_data_report(df_sales["region"])


Отчет о пропущенных значениях:
Столбец [region]: NaN значений: [1] ([10.0%]), Тип данных: [object]


In [None]:
def apply_fillna_strategies(table):
    table = table.copy()

    
    table["price"] = table["price"].fillna(table["price"].median())

    
    table["quantity"] = table["quantity"].fillna(0)

    
    mean_rating = round(table["customer_rating"].mean(), 1)
    table["customer_rating"] = table["customer_rating"].fillna(mean_rating)

    
    mode_region = table["region"].mode()[0]
    table["region"] = table["region"].fillna(mode_region)

    return table
df_sales_processed = apply_fillna_strategies(df_sales)
df_sales_processed


Unnamed: 0,transaction_id,product,category,price,quantity,customer_rating,sale_dt,region
0,101,Laptop,Electronics,1200.0,1.0,4.5,2023-01-05,East
1,102,Mouse,Accessories,25.0,2.0,4.0,2023-01-05,West
2,103,Keyboard,Accessories,75.0,1.0,4.2,2023-01-06,North
3,104,Monitor,Electronics,300.0,1.0,4.8,2023-01-07,East
4,105,Webcam,Accessories,80.0,3.0,3.5,2023-01-07,South
5,106,Laptop,Electronics,1500.0,1.0,4.9,2023-01-08,West
6,107,Headphones,Audio,120.0,2.0,4.2,2023-01-08,North
7,108,Speaker,Audio,80.0,0.0,3.9,2023-01-09,East
8,109,Keyboard,Accessories,60.0,1.0,4.1,2023-01-09,East
9,110,Mouse,Accessories,30.0,2.0,4.2,2023-01-10,South
