## Imports

In [1]:
import pandas as pd

## read the fines.csv that you saved in the previous exercise

In [2]:
df = pd.read_csv('../data/fines.csv')
df.head()

Unnamed: 0,CarNumber,Refund,Fines,Make,Model,Year
0,Y163O8161RUS,2,3200.0,Ford,Focus,1989
1,E432XX77RUS,1,6500.0,Toyota,Camry,1995
2,7184TT36RUS,1,2100.0,Ford,Focus,1984
3,X582HE161RUS,2,2000.0,Ford,Focus,2015
4,92918M178RUS,1,5700.0,Ford,Focus,2014


## iterations: in all the following subtasks, you need to calculate fines/refund*year for each row and create a new column with the calculated data and measure the time using the magic command %%timeit in the cell

In [3]:
def calc_loop():
  new_col = []
  for i in range(0, len(df)):
      new_col.append(df.iloc[i]['Fines'] / (df.iloc[i]
                     ['Refund'] * df.iloc[i]['Year']))
  df['Fines / Refund * Year'] = new_col

In [4]:
%%time
calc_loop()

CPU times: user 388 ms, sys: 3.14 ms, total: 391 ms
Wall time: 388 ms


In [5]:
df = df.drop(['Fines / Refund * Year'], axis=1)

In [6]:
def calc_iterrows():
  new_col = []
  for index, row in df.iterrows():
      new_col.append(row['Fines'] / (row['Refund'] * row['Year']))
  df['Fines / Refund * Year'] = new_col

In [7]:
%%time
calc_iterrows()

CPU times: user 71.9 ms, sys: 0 ns, total: 71.9 ms
Wall time: 72.1 ms


In [8]:
df = df.drop(['Fines / Refund * Year'], axis=1)

In [9]:
def calc_apply():
  df['Fines / Refund * Year'] = df.apply(
      lambda row: row['Fines'] / (row['Refund'] * row['Year']), axis=1)

In [10]:
%%time
calc_apply()

CPU times: user 49.6 ms, sys: 245 µs, total: 49.8 ms
Wall time: 48 ms


In [11]:
df = df.drop(['Fines / Refund * Year'], axis=1)

In [12]:
def calc_Series():
  df['Fines / Refund * Year'] = df['Fines'] / (df['Refund'] * df['Year'])

In [13]:
%%time
calc_Series()

CPU times: user 4.1 ms, sys: 13 µs, total: 4.11 ms
Wall time: 3.78 ms


In [14]:
df = df.drop(['Fines / Refund * Year'], axis=1)

In [15]:
def calc_values():
  df['Fines / Refund * Year'] = df['Fines'].values / \
    (df['Refund'].values * df['Year'].values)

In [16]:
%%time
calc_values()

CPU times: user 2.56 ms, sys: 468 µs, total: 3.03 ms
Wall time: 2.64 ms


In [17]:
df = df.drop(['Fines / Refund * Year'], axis=1)

## indexing: measure the time using the magic command %%timeit in the cell

In [80]:
%%time
df.loc[df['CarNumber'] == 'O136HO197RUS']

CPU times: user 1.07 ms, sys: 1.07 ms, total: 2.14 ms
Wall time: 2.17 ms


Unnamed: 0,CarNumber,Refund,Fines,Make,Model,Year
715,O136HO197RUS,2,7800.0,Toyota,Corolla,1989
902,O136HO197RUS,2,7800.0,Toyota,Corolla,1997


In [81]:
df = df.set_index('CarNumber')
df.head()

Unnamed: 0_level_0,Refund,Fines,Make,Model,Year
CarNumber,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Y163O8161RUS,2,3200.0,Ford,Focus,1989
E432XX77RUS,1,6500.0,Toyota,Camry,1995
7184TT36RUS,1,2100.0,Ford,Focus,1984
X582HE161RUS,2,2000.0,Ford,Focus,2015
92918M178RUS,1,5700.0,Ford,Focus,2014


In [82]:
%%time
df.loc['O136HO197RUS']

CPU times: user 2.09 ms, sys: 0 ns, total: 2.09 ms
Wall time: 8.51 ms


Unnamed: 0_level_0,Refund,Fines,Make,Model,Year
CarNumber,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
O136HO197RUS,2,7800.0,Toyota,Corolla,1989
O136HO197RUS,2,7800.0,Toyota,Corolla,1997


## downcasting:

In [83]:
df.info(memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
Index: 930 entries, Y163O8161RUS to C803EY62RUS
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Refund  930 non-null    int64  
 1   Fines   930 non-null    float64
 2   Make    930 non-null    object 
 3   Model   919 non-null    object 
 4   Year    930 non-null    int64  
dtypes: float64(1), int64(2), object(2)
memory usage: 228.7 KB


In [84]:
optimized = df.copy()

In [85]:
optimized['Fines'] = pd.to_numeric(optimized['Fines'], downcast='float')
optimized['Refund'] = pd.to_numeric(optimized['Refund'], downcast='integer')
optimized['Year'] = pd.to_numeric(optimized['Year'], downcast='integer')

In [86]:
optimized.info(memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
Index: 930 entries, Y163O8161RUS to C803EY62RUS
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Refund  930 non-null    int8   
 1   Fines   930 non-null    float32
 2   Make    930 non-null    object 
 3   Model   919 non-null    object 
 4   Year    930 non-null    int16  
dtypes: float32(1), int16(1), int8(1), object(2)
memory usage: 213.3 KB


## categories:

In [87]:
optimized = optimized.reset_index()
optimized['CarNumber'] = optimized['Make'].astype('category')
optimized['Make'] = optimized['Make'].astype('category')
optimized['Model'] = optimized['Model'].astype('category')

In [88]:
optimized.head()

Unnamed: 0,CarNumber,Refund,Fines,Make,Model,Year
0,Ford,2,3200.0,Ford,Focus,1989
1,Toyota,1,6500.0,Toyota,Camry,1995
2,Ford,1,2100.0,Ford,Focus,1984
3,Ford,2,2000.0,Ford,Focus,2015
4,Ford,1,5700.0,Ford,Focus,2014


In [89]:
optimized.info(memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 930 entries, 0 to 929
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype   
---  ------     --------------  -----   
 0   CarNumber  930 non-null    category
 1   Refund     930 non-null    int8    
 2   Fines      930 non-null    float32 
 3   Make       930 non-null    category
 4   Model      919 non-null    category
 5   Year       930 non-null    int16   
dtypes: category(3), float32(1), int16(1), int8(1)
memory usage: 11.5 KB


## memory clean

In [90]:
%reset_selective -f df

In [91]:
df

NameError: ignored