## Чтение файла

In [111]:
import pandas as pd 

df = pd.read_csv("../data/fines.csv")

## Итерации и вычисления (сравнение методов)

## Реализация через цикл for с iloc и append()

In [87]:
%%timeit
def calculate_loop(df):
    result = []
    for i in range(len(df)):
        fines = df.iloc[i]['Fines']
        refund = df.iloc[i]['Refund']
        year = df.iloc[i]['Year']
        val = (fines / refund) * year
        result.append(val)
    return result

df['strange'] = calculate_loop(df)

35.7 ms ± 533 μs per loop (mean ± std. dev. of 7 runs, 10 loops each)


## Реализация через iterrows()

In [88]:
%%timeit
def calculate_iterrows(df):
    result = []
    for i, row in df.iterrows():
        val = (row['Fines'] / row['Refund']) * row['Year']
        result.append(val)
    return result

df['strange'] = calculate_iterrows(df)

11.1 ms ± 129 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)


## Реализация через apply() с lambda

In [89]:
%%timeit
df['strange'] = df.apply(lambda row: (row['Fines'] / row['Refund']) * row['Year'], axis=1)

3.08 ms ± 33.5 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)


## Реализация через арифметику Series

In [90]:
%%timeit
df['strange'] = (df['Fines'] / df['Refund']) * df['Year']

59.3 μs ± 4.39 μs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [91]:
%%timeit
def calculate_values(df):
    fines = df['Fines'].values
    refund = df['Refund'].values
    year = df['Year'].values
    result = []
    for i in range(len(fines)):
        val = (fines[i] / refund[i]) * year[i]
        result.append(val)
    return result

df['strange'] = calculate_values(df)

334 μs ± 4.33 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


## Индексация

In [92]:
%%timeit
row = df[df["CarNumber"] == "O136HO197RUS"]
df_indexed = df.set_index("CarNumber")
row = df_indexed.loc["O136HO197RUS"]

316 μs ± 70.2 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


## Вывод информации о памяти

In [93]:
df.info(memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 930 entries, 0 to 929
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   CarNumber  930 non-null    object 
 1   Refund     930 non-null    int64  
 2   Fines      930 non-null    float64
 3   Make       930 non-null    object 
 4   Model      919 non-null    object 
 5   Year       930 non-null    int64  
 6   strange    930 non-null    float64
dtypes: float64(2), int64(2), object(3)
memory usage: 182.1 KB


## Создаем копию

In [94]:
optimized_df = df.copy()

## Downcast: преобразование float64 в float32

In [105]:
float_cols = optimized_df.select_dtypes(include=['float64']).columns
optimized_df[float_cols] = optimized_df[float_cols].apply(pd.to_numeric, downcast='float')
optimized_df['strange'] = optimized_df['strange'].astype('float32')

## Downcast: преобразование int64 в наименьший возможный числовой тип

In [96]:
int_cols = optimized_df.select_dtypes(include=['int64']).columns
optimized_df[int_cols] = optimized_df[int_cols].apply(pd.to_numeric, downcast='integer')

## Выводи инфы

In [106]:
optimized_df.info(memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 930 entries, 0 to 929
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   CarNumber  930 non-null    object 
 1   Refund     930 non-null    int8   
 2   Fines      930 non-null    float32
 3   Make       930 non-null    object 
 4   Model      919 non-null    object 
 5   Year       930 non-null    int16  
 6   strange    930 non-null    float32
dtypes: float32(2), int16(1), int8(1), object(3)
memory usage: 163.0 KB


## Преобразование всех столбцов типа object в category

In [108]:
for col in optimized_df.select_dtypes(include='object').columns:
    optimized_df[col] = optimized_df[col].astype('category')

optimized_df.info(memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 930 entries, 0 to 929
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype   
---  ------     --------------  -----   
 0   CarNumber  930 non-null    category
 1   Refund     930 non-null    int8    
 2   Fines      930 non-null    float32 
 3   Make       930 non-null    category
 4   Model      919 non-null    category
 5   Year       930 non-null    int16   
 6   strange    930 non-null    float32 
dtypes: category(3), float32(2), int16(1), int8(1)
memory usage: 63.8 KB


## Очистка памяти

In [109]:
%reset_selective -f df

import gc
gc.collect()

2641