In [47]:
import re
from typing import List

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from scipy.stats import ttest_1samp
from scipy.stats import ttest_ind
from scipy.stats import wilcoxon
from scipy.stats import mannwhitneyu
from sklearn.utils import resample

### Comparison: Before/After Cython Optimization

In [48]:
import efficiency.py_efficiency as ef
import final_func as fn

In [49]:
# Load data
pit = pd.read_csv('data/pit_stops.csv')
results = pd.read_csv('data/results.csv')
status = pd.read_csv('data/status.csv')

### 1. function: merge_df()

In [50]:
# Before
%timeit -r 100 -n 1 fn.merge_data([pit, results, status])

15.6 ms ± 1.84 ms per loop (mean ± std. dev. of 100 runs, 1 loop each)


In [51]:
# After
%timeit -r 100 -n 1 ef.merge_data([pit, results, status])

14.9 ms ± 695 µs per loop (mean ± std. dev. of 100 runs, 1 loop each)


### 2. function: process_data()

In [52]:
merge_df = fn.merge_data([pit, results, status])

In [53]:
# Before
%timeit -r 100 -n 1 ef.process_data(merge_df)

187 ms ± 14.9 ms per loop (mean ± std. dev. of 100 runs, 1 loop each)


In [54]:
# After
%timeit -r 100 -n 1 fn.process_data(merge_df)

183 ms ± 3.93 ms per loop (mean ± std. dev. of 100 runs, 1 loop each)


In [55]:
merge_df = fn.process_data(merge_df)

### 3. function: pit_stop_group()

In [56]:
# Before
%timeit -r 100 -n 1 ef.pit_stop_group(merge_df)

4.78 ms ± 412 µs per loop (mean ± std. dev. of 100 runs, 1 loop each)


In [57]:
# After
%timeit -r 100 -n 1 fn.pit_stop_group(merge_df)

4.97 ms ± 445 µs per loop (mean ± std. dev. of 100 runs, 1 loop each)


In [58]:
# Before
%timeit -r 100 -n 1 fn.pit_stop_group(merge_df, by='total_stops')

3.88 ms ± 342 µs per loop (mean ± std. dev. of 100 runs, 1 loop each)


In [59]:
# After
%timeit -r 100 -n 1 ef.pit_stop_group(merge_df, by='total_stops')

3.81 ms ± 281 µs per loop (mean ± std. dev. of 100 runs, 1 loop each)


In [60]:
df_group = fn.pit_stop_group(merge_df, by='total_stops')