In [17]:
import numpy as np
import pandas as pd
import sys
from hero_funcs import convert_units
from collections import Counter
from itertools import combinations

# Examining Runtime

## Using %timeit

In [53]:
%timeit rand_nums = np.random.rand(1000)

7.76 µs ± 313 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


## Setting number of runs -r and/or number of loops -n

In [54]:
%timeit -r2 -n10 rand_nums = np.random.rand(1000)

The slowest run took 14.36 times longer than the fastest. This could mean that an intermediate result is being cached.
60.5 µs ± 52.6 µs per loop (mean ± std. dev. of 2 runs, 10 loops each)


## Using %timeit in cell magic mode

In [55]:
%%timeit
nums = []
for i in range(10):
    nums.append(i)

804 ns ± 29.1 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)


## Saving runtime to a variable

In [56]:
time = %timeit -o rand_nums = np.random.rand(1000)

7.17 µs ± 105 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [57]:
time

<TimeitResult : 7.17 µs ± 105 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)>

## Examining time for each run

In [58]:
time.timings

[7.377241149997644e-06,
 7.017250720000448e-06,
 7.102453120000974e-06,
 7.145428250000805e-06,
 7.155723800001397e-06,
 7.234244999999646e-06,
 7.13273332999961e-06]

In [59]:
time.best

7.017250720000448e-06

In [60]:
time.worst

7.377241149997644e-06

In [61]:
formal_list_creation = %timeit -o formal_list = list()
literal_list_creation = %timeit -o literal_list = []

72.4 ns ± 1.33 ns per loop (mean ± std. dev. of 7 runs, 10,000,000 loops each)
23.4 ns ± 1.29 ns per loop (mean ± std. dev. of 7 runs, 10,000,000 loops each)


In [62]:
diff = (formal_list_creation.average - literal_list_creation.average) * (10**9)
diff # diff unit returned is in nanoseconds

49.02899418573594

# Code profiling for runtime

In [63]:
heroes = ['Batman','Superman','Wonder Woman']
hts = np.array([188.0, 191.0, 183.0])
wtd = np.array([95.0, 101.0, 74.0])

In [64]:
def convert_units(heroes_lst, heights, weights):
    new_hts = [ht * 0.39370 for ht in heights]
    new_wts = [wt * 2.20462 for wt in weights]

    heroes_dict = {}

    for i, hero in enumerate(heroes_lst):
        heroes_dict[hero] = (new_hts[i], new_wts[i])

    return heroes_dict

In [65]:
print(convert_units(heroes, hts, wtd))

{'Batman': (74.01559999999999, 209.4389), 'Superman': (75.19669999999999, 222.66661999999997), 'Wonder Woman': (72.0471, 163.14188)}


In [66]:
%timeit convert_units(heroes, hts, wtd)

3.54 µs ± 181 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


## Using line_profiler on a function

In [67]:
%load_ext line_profiler

The line_profiler extension is already loaded. To reload it, use:
  %reload_ext line_profiler


In [73]:
%lprun -f convert_units convert_units(heroes, hts, wtd)

# Code Profiling for Memory Usage

## Quick and Dirty Approach

In [69]:
num_list = [*range(1000)]
sys.getsizeof(num_list)

8056

In [70]:
num_array  = np.array(range(1000))
sys.getsizeof(num_array)

8112

## Using memory_profiler on a function

In [76]:
%load_ext memory_profiler

The memory_profiler extension is already loaded. To reload it, use:
  %reload_ext memory_profiler


In [77]:
%mprun -f convert_units convert_units(heroes, hts, wtd)




# Efficient combining,  counting and iterating

## Combining objects

In [2]:
names = ['Bulbasaur','Charmander','Squirtle']
hps = [45, 39, 44]

In [5]:
grouped = zip(names, hps)
print(list(grouped))

[('Bulbasaur', 45), ('Charmander', 39), ('Squirtle', 44)]


## Counting with loop

In [6]:
baseball = pd.read_csv('baseball_stats.csv')

In [7]:
counter_dict = {}

for value in baseball['League']:
    if value in counter_dict:
        counter_dict[value] += 1
    else:
        counter_dict[value] = 1

counter_dict

{'NL': 616, 'AL': 616}

In [10]:
league_count = Counter(baseball['League'])
league_count

Counter({'NL': 616, 'AL': 616})

## Combinations with loops to find pairs

In [13]:
team_subset = baseball['Team'][0:3]
league_subset = baseball['League'][0:3]

0    ARI
1    ATL
2    BAL
Name: Team, dtype: object

In [16]:
combos = []

for x in team_subset:
    for y in team_subset:
        if x == y:
            continue
        if ((x,y) not in combos) & ((y,x) not in combos):
            combos.append((x,y))

combos

[('ARI', 'ATL'), ('ARI', 'BAL'), ('ATL', 'BAL')]

In [20]:
combos_obj = combinations(team_subset, 2)
print([*combos_obj])

[('ARI', 'ATL'), ('ARI', 'BAL'), ('ATL', 'BAL')]


# Set Theory

In [21]:
list_a = ['Bulbasaur','Charmander','Squirtle']
lisst_b = ['Caterpie','Pidgey','Squirtle']

In [22]:
set_a = set(list_a)
set_b = set(lisst_b)

## Finding common items between sets

In [23]:
set_a.intersection(set_b)

{'Squirtle'}

## Finding different items between sets

In [24]:
set_a.difference(set_b) #exist in set_a but not in set_b

{'Bulbasaur', 'Charmander'}

In [26]:
set_b.difference(set_a) # exist in set_b but not in set_a

{'Caterpie', 'Pidgey'}

## Finding symmetric difference

In [28]:
set_a.symmetric_difference(set_b) # returns all unique items from both set_a and set_b

{'Bulbasaur', 'Caterpie', 'Charmander', 'Pidgey'}

## Combining sets

In [30]:
set_a.union(set_b) # duplicates are gathered once

{'Bulbasaur', 'Caterpie', 'Charmander', 'Pidgey', 'Squirtle'}

## Membership testing with sets

In [31]:
print('Caterpie' in set_a)

False


In [32]:
print('Caterpie' in set_b)

True


In [34]:
nl_league = baseball[baseball['League'] == 'NL']
nl_league

Unnamed: 0,Team,League,Year,RS,RA,W,OBP,SLG,BA,Playoffs,RankSeason,RankPlayoffs,G,OOBP,OSLG
0,ARI,NL,2012,734,688,81,0.328,0.418,0.259,0,,,162,0.317,0.415
1,ATL,NL,2012,700,600,94,0.320,0.389,0.247,1,4.0,5.0,162,0.306,0.378
4,CHC,NL,2012,613,759,61,0.302,0.378,0.240,0,,,162,0.335,0.424
6,CIN,NL,2012,669,588,97,0.315,0.411,0.251,1,2.0,4.0,162,0.305,0.390
8,COL,NL,2012,758,890,64,0.330,0.436,0.274,0,,,162,0.357,0.470
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1225,NYM,NL,1962,617,948,40,0.318,0.361,0.240,0,,,161,,
1227,PHI,NL,1962,705,759,81,0.330,0.390,0.260,0,,,161,,
1228,PIT,NL,1962,706,626,93,0.321,0.394,0.268,0,,,161,,
1229,SFG,NL,1962,878,690,103,0.341,0.441,0.278,1,1.0,2.0,165,,


In [35]:
al_league = baseball[baseball['League'] == 'AL']
al_league

Unnamed: 0,Team,League,Year,RS,RA,W,OBP,SLG,BA,Playoffs,RankSeason,RankPlayoffs,G,OOBP,OSLG
2,BAL,AL,2012,712,705,93,0.311,0.417,0.247,1,5.0,4.0,162,0.315,0.403
3,BOS,AL,2012,734,806,69,0.315,0.415,0.260,0,,,162,0.331,0.428
5,CHW,AL,2012,748,676,85,0.318,0.422,0.255,0,,,162,0.319,0.405
7,CLE,AL,2012,667,845,68,0.324,0.381,0.251,0,,,162,0.336,0.430
9,DET,AL,2012,726,670,88,0.335,0.422,0.268,1,6.0,2.0,162,0.314,0.402
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1220,KCA,AL,1962,745,837,72,0.332,0.386,0.263,0,,,162,,
1221,LAA,AL,1962,718,706,86,0.325,0.380,0.250,0,,,162,,
1223,MIN,AL,1962,798,713,91,0.338,0.412,0.260,0,,,163,,
1226,NYY,AL,1962,817,680,96,0.337,0.426,0.267,1,2.0,1.0,162,,


In [36]:
nl_teams = set(nl_league['Team'])
al_teams = set(al_league['Team'])

In [38]:
'BAL' in al_teams

True

In [37]:
%timeit 'BAL' in al_teams

36.2 ns ± 1.34 ns per loop (mean ± std. dev. of 7 runs, 10,000,000 loops each)


In [39]:
al_teams_list = list(al_league['Team'])

In [40]:
%timeit 'BAL' in al_teams_list

41.1 ns ± 2.32 ns per loop (mean ± std. dev. of 7 runs, 10,000,000 loops each)


In [41]:
set(baseball['League'])

{'AL', 'NL'}

# Eliminating Loops

## Use collections, list comprehension and built-in functions

In [57]:

total = []
stats = [
    [1,2,3],
    [4,5,6],
    [7,8,9]
]

for row in stats:
    total.append(sum(row))
total


[6, 15, 24]

In [58]:
total

[6, 15, 24]

In [59]:
stats = [
    [1,2,3],
    [4,5,6],
    [7,8,9]
]

In [60]:
[sum(row) for row in stats]

[6, 15, 24]

In [62]:
total = [*map(sum, stats)]
total

[6, 15, 24]

## Eliminating loops with NumPy

In [63]:
stats_array = np.array(stats)

In [69]:
avg_stats = stats_array.mean(axis=1)
avg_stats

array([2., 5., 8.])

# Writing better loops

## General Rules to follow
#### 1.Understand what is being done in each loop
#### 2. Move one-time calculations outside the loop
#### 3. Anything that is done once should be outside the loop.
#### 4. Use holistic conversion outside the loop

In [73]:
%%timeit
names = ['Bulbasaur','Charmander','Squirtle']
hps = np.array([120, 139, 44])

for name, attack in zip(names, hps):
    avg_attack = hps.mean() #inneficient code bc this variable gets created for each loop

    if attack > avg_attack:
        print(f"{name} has an attack ({attack}) greater than the average of {avg_attack}")

Bulbasaur has an attack (120) greater than the average of 101.0
Charmander has an attack (139) greater than the average of 101.0
Bulbasaur has an attack (120) greater than the average of 101.0
Charmander has an attack (139) greater than the average of 101.0
Bulbasaur has an attack (120) greater than the average of 101.0
Charmander has an attack (139) greater than the average of 101.0
Bulbasaur has an attack (120) greater than the average of 101.0
Charmander has an attack (139) greater than the average of 101.0
Bulbasaur has an attack (120) greater than the average of 101.0
Charmander has an attack (139) greater than the average of 101.0
Bulbasaur has an attack (120) greater than the average of 101.0
Charmander has an attack (139) greater than the average of 101.0
Bulbasaur has an attack (120) greater than the average of 101.0
Charmander has an attack (139) greater than the average of 101.0
Bulbasaur has an attack (120) greater than the average of 101.0
Charmander has an attack (139) gr

In [74]:
%%timeit
names = ['Bulbasaur','Charmander','Squirtle']
hps = np.array([120, 139, 44])
avg_attack = hps.mean() #moving variable to be outside the loop

for name, attack in zip(names, hps):
    if attack > avg_attack:
        print(f"{name} has an attack ({attack}) greater than the average of {avg_attack}")

Bulbasaur has an attack (120) greater than the average of 101.0
Charmander has an attack (139) greater than the average of 101.0
Bulbasaur has an attack (120) greater than the average of 101.0
Charmander has an attack (139) greater than the average of 101.0
Bulbasaur has an attack (120) greater than the average of 101.0
Charmander has an attack (139) greater than the average of 101.0
Bulbasaur has an attack (120) greater than the average of 101.0
Charmander has an attack (139) greater than the average of 101.0
Bulbasaur has an attack (120) greater than the average of 101.0
Charmander has an attack (139) greater than the average of 101.0
Bulbasaur has an attack (120) greater than the average of 101.0
Charmander has an attack (139) greater than the average of 101.0
Bulbasaur has an attack (120) greater than the average of 101.0
Charmander has an attack (139) greater than the average of 101.0
Bulbasaur has an attack (120) greater than the average of 101.0
Charmander has an attack (139) gr

## Using holistic conversions

In [78]:
team = baseball['Team']
league = baseball['League']
year = baseball['Year']

tuples = []
 # Getting a list of tuples
for baseball_tuple in zip(team, league, year):
    tuples.append(baseball_tuple)

# getting a list of list outside the loop
tuples_data = [*map(list, tuples)] #converting each tuple into list outside the loop
tuples_data

[['ARI', 'NL', 2012],
 ['ATL', 'NL', 2012],
 ['BAL', 'AL', 2012],
 ['BOS', 'AL', 2012],
 ['CHC', 'NL', 2012],
 ['CHW', 'AL', 2012],
 ['CIN', 'NL', 2012],
 ['CLE', 'AL', 2012],
 ['COL', 'NL', 2012],
 ['DET', 'AL', 2012],
 ['HOU', 'NL', 2012],
 ['KCR', 'AL', 2012],
 ['LAA', 'AL', 2012],
 ['LAD', 'NL', 2012],
 ['MIA', 'NL', 2012],
 ['MIL', 'NL', 2012],
 ['MIN', 'AL', 2012],
 ['NYM', 'NL', 2012],
 ['NYY', 'AL', 2012],
 ['OAK', 'AL', 2012],
 ['PHI', 'NL', 2012],
 ['PIT', 'NL', 2012],
 ['SDP', 'NL', 2012],
 ['SEA', 'AL', 2012],
 ['SFG', 'NL', 2012],
 ['STL', 'NL', 2012],
 ['TBR', 'AL', 2012],
 ['TEX', 'AL', 2012],
 ['TOR', 'AL', 2012],
 ['WSN', 'NL', 2012],
 ['ARI', 'NL', 2011],
 ['ATL', 'NL', 2011],
 ['BAL', 'AL', 2011],
 ['BOS', 'AL', 2011],
 ['CHC', 'NL', 2011],
 ['CHW', 'AL', 2011],
 ['CIN', 'NL', 2011],
 ['CLE', 'AL', 2011],
 ['COL', 'NL', 2011],
 ['DET', 'AL', 2011],
 ['FLA', 'NL', 2011],
 ['HOU', 'NL', 2011],
 ['KCR', 'AL', 2011],
 ['LAA', 'AL', 2011],
 ['LAD', 'NL', 2011],
 ['MIL', '

# Intro to Pandas DataFrame iteration

In [79]:
baseball.head()

Unnamed: 0,Team,League,Year,RS,RA,W,OBP,SLG,BA,Playoffs,RankSeason,RankPlayoffs,G,OOBP,OSLG
0,ARI,NL,2012,734,688,81,0.328,0.418,0.259,0,,,162,0.317,0.415
1,ATL,NL,2012,700,600,94,0.32,0.389,0.247,1,4.0,5.0,162,0.306,0.378
2,BAL,AL,2012,712,705,93,0.311,0.417,0.247,1,5.0,4.0,162,0.315,0.403
3,BOS,AL,2012,734,806,69,0.315,0.415,0.26,0,,,162,0.331,0.428
4,CHC,NL,2012,613,759,61,0.302,0.378,0.24,0,,,162,0.335,0.424


## iterating with iterrows()

In [82]:
win_pcts = []

for i, row in baseball.iterrows(): #returns a tuple containing an index, and row data
    wins = row['W']
    games_played = row['G']

    win_pct = wins/games_played

    win_pcts.append(win_pct)

baseball['WP'] = win_pcts
baseball.head()

Unnamed: 0,Team,League,Year,RS,RA,W,OBP,SLG,BA,Playoffs,RankSeason,RankPlayoffs,G,OOBP,OSLG,WP
0,ARI,NL,2012,734,688,81,0.328,0.418,0.259,0,,,162,0.317,0.415,0.5
1,ATL,NL,2012,700,600,94,0.32,0.389,0.247,1,4.0,5.0,162,0.306,0.378,0.580247
2,BAL,AL,2012,712,705,93,0.311,0.417,0.247,1,5.0,4.0,162,0.315,0.403,0.574074
3,BOS,AL,2012,734,806,69,0.315,0.415,0.26,0,,,162,0.331,0.428,0.425926
4,CHC,NL,2012,613,759,61,0.302,0.378,0.24,0,,,162,0.335,0.424,0.376543


## iterating with itertuples() (better option than iterrows())

In [83]:
team_wins_df = baseball[['Team','Year','W']]
team_wins_df

Unnamed: 0,Team,Year,W
0,ARI,2012,81
1,ATL,2012,94
2,BAL,2012,93
3,BOS,2012,69
4,CHC,2012,61
...,...,...,...
1227,PHI,1962,81
1228,PIT,1962,93
1229,SFG,1962,103
1230,STL,1962,84


In [86]:
for row_tuple in team_wins_df.iterrows():
    print(row_tuple[1]) # 0 index holds index values from tuple created; index 1 holds data from each row

Team     ARI
Year    2012
W         81
Name: 0, dtype: object
Team     ATL
Year    2012
W         94
Name: 1, dtype: object
Team     BAL
Year    2012
W         93
Name: 2, dtype: object
Team     BOS
Year    2012
W         69
Name: 3, dtype: object
Team     CHC
Year    2012
W         61
Name: 4, dtype: object
Team     CHW
Year    2012
W         85
Name: 5, dtype: object
Team     CIN
Year    2012
W         97
Name: 6, dtype: object
Team     CLE
Year    2012
W         68
Name: 7, dtype: object
Team     COL
Year    2012
W         64
Name: 8, dtype: object
Team     DET
Year    2012
W         88
Name: 9, dtype: object
Team     HOU
Year    2012
W         55
Name: 10, dtype: object
Team     KCR
Year    2012
W         72
Name: 11, dtype: object
Team     LAA
Year    2012
W         89
Name: 12, dtype: object
Team     LAD
Year    2012
W         86
Name: 13, dtype: object
Team     MIA
Year    2012
W         69
Name: 14, dtype: object
Team     MIL
Year    2012
W         83
Name: 15, dtype: object
Te

In [89]:
for row_namedtuple in team_wins_df.itertuples(): #returns each df row as a special datatype called a named tuple
    print(row_namedtuple.Team) #namedTuple can be looked up using dot method

ARI
ATL
BAL
BOS
CHC
CHW
CIN
CLE
COL
DET
HOU
KCR
LAA
LAD
MIA
MIL
MIN
NYM
NYY
OAK
PHI
PIT
SDP
SEA
SFG
STL
TBR
TEX
TOR
WSN
ARI
ATL
BAL
BOS
CHC
CHW
CIN
CLE
COL
DET
FLA
HOU
KCR
LAA
LAD
MIL
MIN
NYM
NYY
OAK
PHI
PIT
SDP
SEA
SFG
STL
TBR
TEX
TOR
WSN
ARI
ATL
BAL
BOS
CHC
CHW
CIN
CLE
COL
DET
FLA
HOU
KCR
LAA
LAD
MIL
MIN
NYM
NYY
OAK
PHI
PIT
SDP
SEA
SFG
STL
TBR
TEX
TOR
WSN
ARI
ATL
BAL
BOS
CHC
CHW
CIN
CLE
COL
DET
FLA
HOU
KCR
LAA
LAD
MIL
MIN
NYM
NYY
OAK
PHI
PIT
SDP
SEA
SFG
STL
TBR
TEX
TOR
WSN
ARI
ATL
BAL
BOS
CHC
CHW
CIN
CLE
COL
DET
FLA
HOU
KCR
LAA
LAD
MIL
MIN
NYM
NYY
OAK
PHI
PIT
SDP
SEA
SFG
STL
TBR
TEX
TOR
WSN
ARI
ATL
BAL
BOS
CHC
CHW
CIN
CLE
COL
DET
FLA
HOU
KCR
LAA
LAD
MIL
MIN
NYM
NYY
OAK
PHI
PIT
SDP
SEA
SFG
STL
TBD
TEX
TOR
WSN
ARI
ATL
BAL
BOS
CHC
CHW
CIN
CLE
COL
DET
FLA
HOU
KCR
LAA
LAD
MIL
MIN
NYM
NYY
OAK
PHI
PIT
SDP
SEA
SFG
STL
TBD
TEX
TOR
WSN
ARI
ATL
BAL
BOS
CHC
CHW
CIN
CLE
COL
DET
FLA
HOU
KCR
LAA
LAD
MIL
MIN
NYM
NYY
OAK
PHI
PIT
SDP
SEA
SFG
STL
TBD
TEX
TOR
WSN
ANA
ARI
ATL
BAL
BOS
CHC
CHW
CIN
CLE
COL


## Pandas alternative to looping