# Speed up your Pandas Code!

In [37]:
import matplotlib
import pandas as pd
import numpy as np

In [38]:
df = pd.DataFrame()
size = 10_000
df['age']= np.random.randint(0,100,size)
df['time_in_bed']=np.random.randint(0,9, size)
df['pct_sleeping'] = np.random.rand(size)
df['favorite_food']=np.random.choice(['pizza','toco','ice-cream'], size)
df['hate_food'] = np.random.choice(['broccoli', 'canday corn', 'eggs'], size)

In [39]:
df

Unnamed: 0,age,time_in_bed,pct_sleeping,favorite_food,hate_food
0,74,3,0.125181,pizza,canday corn
1,25,1,0.298076,pizza,eggs
2,33,5,0.341611,toco,eggs
3,89,1,0.055910,ice-cream,broccoli
4,23,3,0.353171,toco,broccoli
...,...,...,...,...,...
9995,74,4,0.318207,toco,eggs
9996,40,2,0.149529,ice-cream,canday corn
9997,48,6,0.718335,ice-cream,canday corn
9998,72,2,0.230502,ice-cream,broccoli


In [40]:
def get_data(size= 10_000):
    df = pd.DataFrame()
    df['age']= np.random.randint(0,100,size)
    df['time_in_bed']=np.random.randint(0,9, size)
    df['pct_sleeping'] = np.random.rand(size)
    df['favorite_food']=np.random.choice(['pizza','toco','ice-cream'], size)
    df['hate_food'] = np.random.choice(['broccoli', 'canday corn', 'eggs'], size)

    return df

In [41]:
df1 = get_data()

In [42]:
df1

Unnamed: 0,age,time_in_bed,pct_sleeping,favorite_food,hate_food
0,61,1,0.573620,pizza,broccoli
1,64,3,0.585688,pizza,eggs
2,1,5,0.674363,pizza,canday corn
3,4,3,0.108029,pizza,canday corn
4,12,6,0.466635,ice-cream,broccoli
...,...,...,...,...,...
9995,66,7,0.858438,toco,eggs
9996,56,3,0.422001,toco,eggs
9997,19,5,0.646118,toco,broccoli
9998,30,2,0.779848,pizza,canday corn


## The Problem
Reward calculation:
- If they were in bed fo more than 5 hours AND they were sleeping for more than 50% we give them their favorite food.
- Otherwise we give them their hate food.
- If they are over 90 years old give their favorite food regardless.

In [43]:
def reward_calc(row):
    if row['age'] >= 90:
        return row['favorite_food']
    if (row['time_in_bed'] > 5) & (row['pct_sleeping'] > 0.5):
        return row['favorite_food']
    return  row['hate_food']

## Level 1 - Loop

In [None]:
%%timeit
df = get_data()
for index, row in df.iterrows():
    df.loc[index, 'reward'] = reward_calc(row)

## Level 2 - Apply

In [None]:
%%timeit
df = get_data()
df['reward'] = df.apply(reward_calc, axis=1)

## Level 3 - Vectorized

In [None]:
%%timeit
df = get_data()
df['reward'] = df['hate_food']
df.loc[ ((df['pct_sleeping'] > 0.5) & (df['time_in_bed'])) |
        (df['age']> 90), 'reward'] = df['favorite_food']

In [None]:
%%timeit
df = get_data()

In [None]:
%%timeit
df['reward'] = df['hate_food']
df.loc[ ((df['pct_sleeping'] > 0.5) & (df['time_in_bed'])) |
        (df['age']> 90), 'reward'] = df['favorite_food']

## Plot Difference

In [None]:
results = pd.DataFrame(
    [
        ["loop", 3580, 48.3],
        ["apply", 192, 6.34],
        ["vectorized", 1.36, 0.00896],
    ],
    columns=["type","mean","std"],
)

In [None]:
results.set_index('type')['mean'].plot(kind='bar',)

In [None]:
results.set_index('type')['mean'].plot(kind='bar',)