# Функция, выводящая общую информацию о таблице с графиком пропусков

In [21]:
import pandas as pd

In [None]:
создаем тестовые датафреймы

In [22]:
d1 = pd.DataFrame({"Andromeda": [4, 69, 8, 4],
                  "Score": [0.2566, 0.9988, "unknow", 0.2]})
d2 = pd.DataFrame({"Voyager": [434, 34, 69, 8, 4, 8, 8]})
d3 = pd.DataFrame({"Cassiopeia": [4, 9, 7, 5, 556, 6, 0, 5]})

In [None]:
сама функция

In [23]:
def get_info_dataframe(dataframe,
                       transpose=False,
                       count_rows=5
                       ):
    """
    получение основной информации о датафрейме: первые строки датафрейма, количество строк, столбцов, дубликатов, пропусков, типы данных
    `dataframe` - исследуемый датафрейм,  
    `transpose` - перевернуть таблицу на 90°,   
    `count_rows` - чило выводимых строк датафрейма (по умолчанию 5)
    """

    # -----------------сбока таблицы с пропусками---------------------

    # число пропусков

    count_missing = dataframe.isna().sum()

    # доля пропусков

    missing_percent = count_missing/len(dataframe)*100

    # сборка

    df = pd.DataFrame(data={'missing': missing_percent,
                            'count_missing': count_missing
                            }
                      )

    dt = pd.DataFrame(data={'type': dataframe.dtypes})
    df = df.join(dt)

    # цветной бар в столбце с долей пропусков и добапвляем знак %

    df = df.sort_values('missing', ascending=True)

    def color(x):

        dict_color = {
            "int64": "#3399ff",
            "float64": "#3399ff",
            "object": "magenta"
        }

        x = str(x)
        return f"color: {dict_color.get(x)}"

    table_missing = (df.style

                     .set_table_styles([{
                         'selector': 'tr:hover',
                         'props': [('background-color', '#222222'), ('color', '#fff')]
                         # [('border-color', 'blue'), ('border-style', 'solid'), ('border-width', '1pt')]
                     }])
                     .map(lambda x: ""
                          if x > 0
                          else "color: #32cd32; font-weight:600",
                          subset=["missing", "count_missing"])
                     .map(color,
                          subset="type")
                     .bar(subset="missing",
                          vmax=100,
                          height=90,
                          color="#f00",
                          )
                     .set_caption('Пропуски и тип данных')
                     .format(subset="missing",
                             formatter="{:.2f} %")

                     )

    # --------------------------------------

    # -------------------------

    # часть таблицы
    rows, columns = dataframe.shape

    # поиск дубликатов
    dublicat = dataframe.duplicated().sum()

    # харатеристики таблицы
    table_shape_duplicat = pd.DataFrame({"rows": [rows],
                                         "columns": [columns],
                                         "duplicates": [dublicat]})

    table_shape_duplicat = (table_shape_duplicat.style

                            .map(lambda x: "background-color: #ff0000; color:black; font-weight:600"
                                 if x > 0
                                 else "background: #32cd32; color:black; font-weight:600",
                                 subset=["duplicates"]).set_caption("Размер"))

    # -------------------- вывод на экран --------------

    table_first_row = dataframe.head(count_rows)

    if transpose:
        display(table_first_row.T)
    else:
        display(table_first_row)

    display(table_shape_duplicat)

    display(table_missing)

    # разделительная линия
    print("===="*10, "\n")

    # -------------------------------------------------

# Тест

In [24]:
get_info_dataframe(d1)

Unnamed: 0,Andromeda,Score
0,4,0.2566
1,69,0.9988
2,8,unknow
3,4,0.2


Unnamed: 0,rows,columns,duplicates
0,4,2,0


Unnamed: 0,missing,count_missing,type
Andromeda,0.00 %,0,int64
Score,0.00 %,0,object





In [None]:
реализация на классе

In [26]:
class InfoDataframe():

    def __init__(self, dataframe):
        self.dataframe = dataframe

    @property
    def table_missing(self):
        '''возвращает таблицу с пропусками'''

        def get_color(x):
            '''возвращает цвет для типа'''

            dict_color = {
                "int64": "#3399ff",
                "float64": "#3399ff",
                "object": "magenta"
            }

            x = str(x)
            return f"color: {dict_color.get(x)}"

        count_missing = self.dataframe.isna().sum()

        # доля пропусков

        missing_percent = count_missing / len(self.dataframe) * 100

        # сборка

        df = pd.DataFrame(data={'missing': missing_percent,
                                'count_missing': count_missing
                                }
                          )

        dt = pd.DataFrame(data={'type': self.dataframe.dtypes})
        df = df.join(dt)

        # цветной бар в столбце с долей пропусков и добапвляем знак %

        df = df.sort_values('missing', ascending=True)

        table_missing = (df
                         .style
                         .set_table_styles([{
                             'selector': 'tr:hover',
                             'props': [('background-color', '#222222'), ('color', '#fff')]
                             
                         }])
                         .map(lambda x: ""
                              if x > 0
                              else "color: #32cd32; font-weight:600",
                              subset=["missing", "count_missing"])
                         .map(get_color,
                              subset="type")
                         .bar(subset="missing",
                              vmax=100,
                              height=90,
                              color="#f00",
                              )
                         .set_caption('Пропуски и тип данных')
                         .format(subset="missing",
                                 formatter="{:.2f} %")

                         )
        return table_missing

    @property
    def size_table(self):
        '''Возвращает таблицу с количеством строк, столбцов и дубликатов'''

        # часть таблицы
        rows, columns = self.dataframe.shape

        # поиск дубликатов
        dublicat = self.dataframe.duplicated().sum()

        # харатеристики таблицы
        table_shape_duplicat = pd.DataFrame({"rows": [rows],
                                            "columns": [columns],
                                             "duplicates": [dublicat]})

        table_shape_duplicat = (table_shape_duplicat
                                .style
                                .map(lambda x: "background-color: #ff0000; color:black; font-weight:600"
                                     if x > 0
                                     else "background: #32cd32; color:black; font-weight:600",
                                     subset=["duplicates"]).set_caption("Размер"))

        return table_shape_duplicat

    @property
    def full_info(self):
        display(self.size_table)
        display(self.table_missing)

In [27]:
df_info = InfoDataframe(data)
df_info.table_missing


Unnamed: 0,missing,count_missing,type
family,0.00 %,0,object
name,0.00 %,0,object
patronymic,0.00 %,0,object
gender,0.00 %,0,object
year_start,0.00 %,0,object
month_start,0.00 %,0,object
day_start,0.00 %,0,object
year_end,0.00 %,0,object
month_end,0.00 %,0,object
day_end,0.00 %,0,object


In [28]:
q = {"таблица 1": d1,
     "таблица 2": d2,
     "таблица 3": d3}

In [29]:
for name, df in zip(q.keys(), q.values()):
    print("-*" * 20)
    print(name.upper())
    t = InfoDataframe(df)
    display(t.table_missing)

-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
ТАБЛИЦА 1


Unnamed: 0,missing,count_missing,type
Andromeda,0.00 %,0,int64
Score,0.00 %,0,object


-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
ТАБЛИЦА 2


Unnamed: 0,missing,count_missing,type
Voyager,0.00 %,0,int64


-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
ТАБЛИЦА 3


Unnamed: 0,missing,count_missing,type
Cassiopeia,0.00 %,0,int64
