In [4]:
import re
import pandas as pd
import os

In [15]:
class InvalidExtension(Exception):
    def __init__(self, extension):
        error_message = f'Расширение .{extension} не поддерживается'
        super().__init__(error_message)

In [20]:
def read_file(file_path, separator=';'):
    df = None
    if os.path.exists(file_path):
        filename = file_path.split('//')[-1]
        file_extension = filename.split('.')[-1].lower()
        if file_extension == 'xlsx':
            df = pd.read_excel(file_path)
        elif file_extension == 'csv':
            df = pd.read_csv(file_path, sep=separator, encoding='cp1251')
        else:
            raise InvalidExtension(file_extension)
    else:
        raise FileNotFoundError(f'Файл по пути {file_path} не существует')
    return df

In [60]:
df = read_file('battles.csv',',')

In [63]:
def mem_usage(pandas_obj):
    if isinstance(pandas_obj,pd.DataFrame):
        usage_b = pandas_obj.memory_usage(deep=True).sum()
    else: # исходим из предположения о том, что если это не DataFrame, то это Series
        usage_b = pandas_obj.memory_usage(deep=True)
    usage_mb = usage_b / 1024 ** 2 # преобразуем байты в мегабайты
    return "{:03.2f} MB".format(usage_mb)

In [68]:
mem_usage(df)

'0.04 MB'

In [65]:
float64_columns = df.select_dtypes(include="float64").columns
for float64_column in float64_columns:
    df[float64_column] = df[float64_column].astype('float32')

In [67]:
int64_columns = df.select_dtypes(include="int64").columns
for int64_column in int64_columns:
    type_check_list = list((df[int64_column]>2147483648) & (df[int64_column]<-2147483648))
    if not any(type_check_list):
        df[int64_column] = df[int64_column].astype('int32')
    else:
        pass

In [None]:
object_columns = df.select_dtypes(include="object").columns
for object_column in object_columns:
    num_unique_values = len(df[object_column].unique())
    num_total_values = len(df[object_column])
    if num_unique_values / num_total_values < 0.5:
        df[object_column] = df[object_column].astype('category')