|ID|Project Name|
|---|---|
|31823|Weather Analysis Project|

In [1]:
import numpy as np
import pandas as pd

In [8]:
def get_data(size = 10_100):
    data = pd.DataFrame()
    data["age"] = np.random.randint(0,100,size)
    data["time_to_bed"] = np.random.randint(0,9,size)
    data["pct_sleeping"] = np.random.rand(size)
    data["favorite_food"] = np.random.choice(["fruit_juice","fit_salad","fish_chips","mihlama"],size)
    data["fcking_food"] = np.random.choice(["candy","sweety_snack","popcorn"],size)
    return data

In [9]:
my_dataset = get_data(10_500)

In [10]:
my_dataset.describe()

Unnamed: 0,age,time_to_bed,pct_sleeping
count,10500.0,10500.0,10500.0
mean,49.280476,4.068952,0.501917
std,29.073788,2.566834,0.288012
min,0.0,0.0,1.3e-05
25%,24.0,2.0,0.25495
50%,49.0,4.0,0.501861
75%,75.0,6.0,0.751613
max,99.0,8.0,0.999989


In [11]:
my_dataset.head()

Unnamed: 0,age,time_to_bed,pct_sleeping,favorite_food,fcking_food
0,35,3,0.178707,fruit_juice,candy
1,18,6,0.14243,fruit_juice,sweety_snack
2,40,8,0.329629,fish_chips,popcorn
3,34,5,0.916922,mihlama,candy
4,41,6,0.821113,fish_chips,sweety_snack


## The Problem
* if they were in bed more than 5 hours AND they were sleeping more than %50, then we will give them favourite_food
* otherwise, we will give them fcking_food
* if they are over 90 years old give them favourite_food

In [12]:
def reward_calculate(row):
    if(row["age"]>=90):
        return row["favorite_food"]
    if(row["time_to_bed"] > 5) & (row["pct_sleeping"]> 0.5):
        return row["favorite_food"]
    return row["fcking_food"]

* <b>Level 1 - Loop</b>

In [14]:
%%timeit
for index, row in my_dataset.iterrows():
    my_dataset.loc[index,"reward"] = reward_calculate(row)

1.04 s ± 6.34 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


* <b>Level 2 - Apply</b>

In [16]:
%%timeit
my_dataset["reward"] = my_dataset.apply(reward_calculate, axis = 1)

143 ms ± 1.67 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


* <b>Vectorized Func</b>

In [17]:
%%timeit
my_dataset["reward"] = my_dataset["fcking_food"]
my_dataset.loc[(my_dataset["age"]>=90) | ((my_dataset["time_to_bed"]>5) & (my_dataset["pct_sleeping"]>0.5)),"reward"] = my_dataset["favorite_food"]

1.06 ms ± 6.52 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


* <b>Results on Plot</b>

In [None]:

results = pd.DataFrame(
    [
        ["loop",1040000,933000],
     ["apply",188000,8450],
     ["vectorized",2600,0.000000116]
    ],
    columns= ["type","mean","std"]
)