# Efficient Code Writing

## Run time / Memory Check to make the most efficient code (Pythonic way)

In [1]:
import this

The Zen of Python, by Tim Peters

Beautiful is better than ugly.
Explicit is better than implicit.
Simple is better than complex.
Complex is better than complicated.
Flat is better than nested.
Sparse is better than dense.
Readability counts.
Special cases aren't special enough to break the rules.
Although practicality beats purity.
Errors should never pass silently.
Unless explicitly silenced.
In the face of ambiguity, refuse the temptation to guess.
There should be one-- and preferably only one --obvious way to do it.
Although that way may not be obvious at first unless you're Dutch.
Now is better than never.
Although never is often better than *right* now.
If the implementation is hard to explain, it's a bad idea.
If the implementation is easy to explain, it may be a good idea.
Namespaces are one honking great idea -- let's do more of those!


## Use Built-in Functions: range, enumerate, map(function, object), numpy array broadcasting

In [3]:
letters = ['a', 'b', 'c', 'd' ]
indexed_letters = enumerate(letters)
indexed_letters_list = list(indexed_letters)
print(indexed_letters_list)


indexed_letters2 = enumerate(letters, start=5)
indexed_letters2_list = list(indexed_letters2)
print(indexed_letters2_list)


[(0, 'a'), (1, 'b'), (2, 'c'), (3, 'd')]
[(5, 'a'), (6, 'b'), (7, 'c'), (8, 'd')]


In [6]:
nums = [1.5, 2.3, 3.4, 4.6, 5.0]
rnd_nums = map(round, nums)
print(list(rnd_nums))

nums = [1, 2, 3, 4, 5]
sqrd_nums = map(lambda x: x ** 2, nums)
print(list(sqrd_nums))

[2, 2, 3, 5, 5]
[1, 4, 9, 16, 25]


In [16]:
import numpy as np
nums = np.array(range(5))
print (nums)

#list comprehension
sqrd_num=[num**2 for num in nums]
print(sqrd_num)

# numpy array broadcasting
print(nums ** 2)

[0 1 2 3 4]
[0, 1, 4, 9, 16]
[ 0  1  4  9 16]


## Examine Runtime

In [18]:
%timeit rand_nums = np.random.rand(1000)


31.7 µs ± 199 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [19]:
# Set number of runs to 2 (-r2)# Set number of loops to 10 (-n10)
%timeit -r2 -n10 rand_nums = np.random.rand(1000)


88.9 µs ± 45.4 µs per loop (mean ± std. dev. of 2 runs, 10 loops each)


In [27]:
%%timeit # this has to be the first line
#multiple lines of code
nums=[]
for x in range(10):
    nums.append(x)

2.22 µs ± 51.8 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [24]:
times = %timeit -o rand_nums = np.random.rand(1000)


34.1 µs ± 3.38 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [25]:
print(times)

34.1 µs ± 3.38 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


## Code Profiling

In [None]:
#pip install line_profiler

In [64]:
heroes = ['Batman', 'Superman', 'Wonder Woman']
hts = np.array([188.0, 191.0, 183.0])
wts = np.array([ 95.0, 101.0,  74.0])


In [40]:
list(enumerate(heroes))

[(0, 'Batman'), (1, 'Superman'), (2, 'Wonder Woman')]

In [65]:
for index, value in enumerate(heroes):
    print(f'{index}: {value}')

0: Batman
1: Superman
2: Wonder Woman


In [74]:
def convert_units(heroes, heights, weights):    
    new_hts = [ht * 0.39370for ht in heights]    
    new_wts = [wt * 2.20462for wt in weights]    
    
    hero_data = {}
    
    for i, hero in enumerate(heroes):
        hero_data[hero] = (new_hts[i], new_wts[i])
    return(hero_data)       

convert_units(heroes, hts, wts)

{'Batman': (74.01559999999999, 209.4389),
 'Superman': (75.19669999999999, 222.66661999999997),
 'Wonder Woman': (72.0471, 163.14188)}

In [75]:
%load_ext line_profiler
%lprun -f convert_units convert_units(heroes, hts, wts)


Timer unit: 1e-07 s

Total time: 0.0001145 s
File: <ipython-input-74-229e0ac7c7cf>
Function: convert_units at line 1

Line #      Hits         Time  Per Hit   % Time  Line Contents
     1                                           def convert_units(heroes, heights, weights):    
     2         1        517.0    517.0     45.2      new_hts = [ht * 0.39370for ht in heights]    
     3         1        222.0    222.0     19.4      new_wts = [wt * 2.20462for wt in weights]    
     4                                               
     5         1         40.0     40.0      3.5      hero_data = {}
     6                                               
     7         4        193.0     48.2     16.9      for i, hero in enumerate(heroes):
     8         3        142.0     47.3     12.4          hero_data[hero] = (new_hts[i], new_wts[i])
     9         1         31.0     31.0      2.7      return(hero_data)

## Examine Memory Usage
- save function in .py and then import function to use memory profiler

In [76]:
import sys
nums_list = [*range(1000)]
sys.getsizeof(nums_list)

9112

In [None]:
#pip install memory_profiler

In [None]:
#pip install nbconvert

In [83]:
#%load_ext memory_profiler

In [None]:
#from xxx import xxx function
#%mprun -f convert_units convert_units(heroes, hts, wts)

## Combining

In [90]:
names= range(10)
primary_types=range(100,1000,100)

names_type1 = [*zip(names, primary_types)]

print(*names_type1[:5], sep='\n')

(0, 100)
(1, 200)
(2, 300)
(3, 400)
(4, 500)


## Itertools

In [93]:
from itertools import combinations

In [94]:
name=['kukhwa','A','B','C','D']

In [101]:
combo=combinations(name,2)
print(type(combo),'\n')
print(*combinations(name,2),sep='\n')

<class 'itertools.combinations'> 

('kukhwa', 'A')
('kukhwa', 'B')
('kukhwa', 'C')
('kukhwa', 'D')
('A', 'B')
('A', 'C')
('A', 'D')
('B', 'C')
('B', 'D')
('C', 'D')


## Membership check using Set ( intersection, difference, symmetric_difference)

In [102]:
name_set=set(name)

In [103]:
%timeit 'kukhwa' in name
%timeit 'kukhwa' in name_set

84.2 ns ± 0.815 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)
97 ns ± 6.86 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)


In [104]:
print('kukhwa' in name_set)

True


- Use numpy array broadcasting + list comprehension rather than loop (example only)

In [None]:
%%timeit
hp_avg=hps.mean()
hp_std=hps.std()
z_score=(hps-hp_avg)/hp_std
poke_zscores=[*zip(names, hps, z_score)]
highest_hp_pokemon2 = [(name, hp, zscore) for name,hp,zscore in poke_zscores if zscore > 2]

## Panda DataFrame iterrows() itertuples()
- use iterrow instead of iloc[i]
- example only

In [None]:
# used tuple insteady of for i,row in pit_df.iterrows()

for row_tuple in pit_df.iterrows():
    print(row_tuple)
    print(type(row_tuple))

In [None]:
# Loop over the DataFrame and print each row's Index, Year and Wins (W)
for row in rangers_df.itertuples():
  i = row.Index
  year = row.Year
  wins = row.W
  
  # Check if rangers made Playoffs (1 means yes; 0 means no)
  if row.Playoffs == 1:
    print(i, year, wins)

.apply()

In [None]:
def text_playoffs(num_playoffs): 
    if num_playoffs == 1:
        return 'Yes'
    else:
        return 'No' 
# Convert numeric playoffs to text
textual_playoffs = rays_df.apply(lambda row: text_playoffs(row['Playoffs']), axis=1)
# Or
#textual_playoffs = rays_df['Playoffs'].apply(text_playoffs)
print(textual_playoffs)

## most efficient to use np.array  (or built-in functions)

In [None]:
df
c=df[a].values-df[b].values
df[c]=c

c=myfunction(df[a].values,df[b].values)
df[c]=c