# Writing Efficient Code in Python

## Part 1

In [1]:
nums = range(0,11)

In [2]:
nums

range(0, 11)

In [3]:
nums_list= list(nums)

In [4]:
print(nums_list)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]


In [5]:
 even_nums = range(2,11,2)

In [6]:
even_nums_list = list(even_nums)
print(even_nums_list)

[2, 4, 6, 8, 10]


In [7]:
#Enumerate: Creates an indexed list of objects

In [8]:
#map: Applies a function over an object. THIS IS SUPER IMPORTANT!

In [9]:
#eg:
nums = [1.5, 2.3, 3.4, 4.6, 5.0]
rnd_nums= map(round,nums)

print(list(rnd_nums))

[2, 2, 3, 5, 5]


In [10]:
#map can also be used with a lambda (anonymous function)
nums= [1,2,3,4,5]

sqrd_nums = map(lambda x:x**2, nums)

print(list(sqrd_nums))

[1, 4, 9, 16, 25]


In [11]:
#So a lambda is an Anonymous function

In [12]:
#Map allows us to apply a function ITERATIVELY WITHOUT WRITING A FOR 
#LOOP

In [13]:
#Task 1: Ranges
# Create a range object that goes from 0 to 5
nums = range(6)
print(type(nums))

# Convert nums to a list
nums_list = list(nums)
print(nums_list)

# Create a new list of odd numbers from 1 to 11 by unpacking a range object
nums_list2 = [*range(1,12,2)]
print(nums_list2)

<class 'range'>
[0, 1, 2, 3, 4, 5]
[1, 3, 5, 7, 9, 11]


In [14]:
#Task 2: Use Enumerate to index the following names
names = ['Jerry', 'Kramer', 'Elaine', 'George', 'Newman']

# Rewrite the for loop to use enumerate
indexed_names = []
for i,name in enumerate(names):
    index_name = (i,name)
    indexed_names.append(index_name) 
print(indexed_names)

# Rewrite the above for loop using list comprehension. More efficient than before
indexed_names_comp = [(i,name) for i,name in enumerate(names)]
print(indexed_names_comp)

# Unpack an enumerate object with a starting index of one. MOST EFFICIENT.
indexed_names_unpack = [*enumerate(names, 1)]
print(indexed_names_unpack)

[(0, 'Jerry'), (1, 'Kramer'), (2, 'Elaine'), (3, 'George'), (4, 'Newman')]
[(0, 'Jerry'), (1, 'Kramer'), (2, 'Elaine'), (3, 'George'), (4, 'Newman')]
[(1, 'Jerry'), (2, 'Kramer'), (3, 'Elaine'), (4, 'George'), (5, 'Newman')]


In [15]:
#Task 3: Using map function to manipulate a list

# Use map to apply str.upper to each element in names
names_map  = map(str.upper, names)

# Print the type of the names_map
print(type(names_map))

# Unpack names_map into a list
names_uppercase = [*names_map]

# Print the list created above
print(names_uppercase)

<class 'map'>
['JERRY', 'KRAMER', 'ELAINE', 'GEORGE', 'NEWMAN']


In [16]:
#THE POWER OF NUMPY ARRAYS

In [17]:
# Numpy = Numerical Python
# The most important thing about Numpy is the Numpy Array
# Numpy Arrays provide a FAST, and MEMORY EFFICIENT alternative to lists
#list = np.array

In [18]:
#Numpy arrays are all HOMOGENEOUS, meaning they all store the same data type (dtype)
import numpy as np
nums_np_ints = np.array([1,2,3])

In [19]:
nums_np_ints.dtype

dtype('int64')

In [20]:
#Numpy arrays "Vectorize operations" so that they are performed at all elements of an
#object at once. This allows for more efficiency

nums_np = np.array([-2,-1,0,1,2])
nums_np**2

#NOTE: This can NOT be done with lists!!

array([4, 1, 0, 1, 4])

In [21]:
#Lists vs. Arrays: When using 2-dimensional arrays and lists, numpy arrays are superior
# For 1 Dimensional arrays, they are identical.

In [22]:
#To turn a list into array:
nums2 = [[1,2,3],
        [4,5,6]]

nums2_np = np.array(nums2)

In [23]:
nums2_np

array([[1, 2, 3],
       [4, 5, 6]])

In [24]:
import numpy as np
#Numpy Array Boolean indexing: Arrays make indexing very easy compared to lists
nums= [-2,-1,0,1,2]
nums_np = np.array(nums)

In [25]:
#Find all values in the array greater than Zero
nums_np > 0

array([False, False, False,  True,  True])

In [26]:
nums_np[nums_np > 0]

array([1, 2])

In [27]:
# If you wanted to do this in lists, you would need to write a for loop or use a
# list comprehension

## Part 2

In [28]:
#Examining Runtime in our Code

In [29]:
#Runtime is an important indicator when thinking about efficiency
#It allows us to pick which code is optimal- fastest and efficient.

In [30]:
# Using the Magic Command %timeit . To use %timeit just add this line of code before the function!!

%timeit rand_nums = np.random.rand(1000)

9.17 µs ± 775 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [31]:
# %timeit runs through the code multiple times to run through the estimated execution time

In [32]:
%timeit nums = [x for x in range(10)]

528 ns ± 18.6 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [33]:
#Use Timeit to compare two ways to do the same code

In [34]:
%timeit formal_dict = dict()

82.1 ns ± 0.399 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)


In [35]:
%timeit literal_dict = {}
#So writing a dictionary with {} is much faster!!

31.4 ns ± 1.93 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)


In [36]:
#Code Profiling for Runtime
#In order to see the line by line runtimes by function you use this.
#Code Profiling is used to describe how long and how often it takes 
#to execute a function

#Line-by-line analyses

In [37]:
pip install line_profiler

Note: you may need to restart the kernel to use updated packages.


In [38]:
# Code Profiling for Memory Usage
# Looks at how to evaluate the codes memory footprint

In [39]:
import sys
#Can return the size of an object in bytes

In [40]:
sys.getsizeof(nums_list)

144

In [41]:
# The memory_profiler package is very similar to the line_profiler package

In [42]:
pip install memory_profiler

Note: you may need to restart the kernel to use updated packages.


In [43]:
#You can see a line by line function for MEMORY instead of time
#It allows you to see which lines of code are taking up the most
#memory to make your code more efficient.

In [44]:
%mprun -f convert_units convert_units(heroes, hts, wts)

UsageError: Line magic function `%mprun` not found.


## Part 3

In [None]:
#Efficiently Combining, Counting and Iterating Over Datasets

In [None]:
#Combining Objects

In [None]:
names= ["Bulbasaur","Charmander","Squirtle"]
hps= [45,39,44]

In [None]:
combined = []

for i,pokemon in enumerate(names):
    combined.append((pokemon,hps[i]))
    
print(combined)

In [None]:
#For combining data, Zip is more elegant than a loop

combined_zip = zip(names,hps)

In [None]:
combined_zip_list = [*combined_zip]
print(combined_zip_list)
#Each item is a tuple

In [None]:
#Counting Data Sets using counter

In [None]:
from collections import Counter

In [None]:
type_counts = Counter(poke_types)

In [None]:
# Set Theory

In [None]:
#Often we want to compare two objects to compare the similarities and differences between
#their contents. When doing this type of comparison, its better to leverage Set Theory.

In [None]:
#The set datatype is built in Python

In [None]:
#EXAMPLE: Suppose you have 2 lists of Pokemon:
list_a = ["Bulbasaur","Charmader","Squirtle"]
list_b = ["Caterpie","Pidgey","Squirtle"]

In [None]:
#We want to compare these lists to see which Pokemon appear in BOTH Lists

In [None]:
#Option 1: Use a for loop
in_common = []

for pokemon_a in list_a:
    for pokemon_b in list_b:
        if pokemon_a == pokemon_b:
            in_common.append(pokemon_a)
            
print(in_common)

In [None]:
#But the above method is inefficient:
set_a = set(list_a)
set_b = set(list_b)

#Convert these two lists into SETS. And now we can do several tests with them!!

#Find which Pokemon they have in Common: Intersection
set_a.intersection(set_b)

In [None]:
#Find Pokemon that exists in one set but not in another: Difference
set_a.difference(set_b)

In [None]:
set_b.difference(set_a)

In [None]:
#Find Pokemon that exist in one of these sets but not both: Symmetric Difference
set_a.symmetric_difference(set_b)

In [None]:
# Combine these two sets: Union
set_b.union(set_a)

In [None]:
#Membership Testing: To see if a value belongs in a list: In
# This type of test is MUCH faster in a Set than in a list or a tuple.

"Bulbasaur" in set_a

In [None]:
#A set is defined as a collection of distinct elements. So we can use a set to collect
#unique items.

In [None]:
#ELIMINATING LOOPS

In [None]:
#Although Using Loops can be a good practice, using them excessively can be inefficient
#and costly. We have certain ways to eliminate or minimize the necessity of using loops in
#our code

In [None]:
#Looping Patterns: for, while, nested. All of them are costly. So try to avoid looping.

In [None]:
# List comprehension or Map functions take less lines of code and have faster runtimes.

In [None]:
# Another powerful technique to eliminate loops is to use the NumPy Package.

In [None]:
#WRITING BETTER LOOPS: Loops can be costly and in efficient. But sometimes Necessary!

In [None]:
#Some techniques and rules can be considered to make loops more efficient:

In [None]:
# 1. Moving Calculations OUT OF a loop

In [None]:
names = ["Absol", "Aron", "Jynx","Natu","Onix"]
attacks = np.array([130,70,50,50,45])

In [None]:
#We want to print the list of people with an attack value GREATER than the average

In [None]:
for name,attack in zip(names,attacks):
    total_attack_avg = attacks.mean()
    if attack > total_attack_avg:
        print(
        "{}'s attackL {} > average: {}!"
        .format(name,attack,total_attack_avg)
        )

In [None]:
#This is not efficient because you are iterating the mean calculation across each loop!!
#You can move the mean calculation OUT OF THE LOOP to make it more efficient.

In [None]:
#Move mean out of the loop, since this calculation is a one-off. It doesnt change between 
#iterations!!
total_attack_avg = attacks.mean()

for name,attack in zip(names,attacks):
    if attack > total_attack_avg:
        print(
        "{}'s attackL {} > average: {}!"
        .format(name,attack,total_attack_avg)
        )

In [None]:
#Using Holistic Conversions

In [None]:
#EXAMPLE: We want to combine 3 lists using the zip function
names= ["Pikachu","Squirtle","Articuno"]
legend_status= ["False","False","True"]
generations = [1,1,2]

In [None]:
poke_data = []

for poke_tuple in zip(names,legend_status,generations):
    poke_list = list(poke_tuple) #Remember: Zip returns a collection of Tuples!!
    poke_data.append(poke_list)
    
print(poke_data) #Now you get a List of Lists

In [None]:
#However, converting each tuple into a list WITHIN a loop is NOT EFFICIENT!

In [None]:
#Instead you should collect all of the Poke Tuples together and use the map function
#to convert each tuple into a list
poke_data_tuples = []

for poke_tuple in zip(names,legend_status,generations):
    poke_data_tuples.append(poke_tuple)
    
poke_data2 = [*map(list, poke_data_tuples)] #Move the Tuple conversion OUTSIDE or Below loop

print(poke_data2)

In [None]:
#Converting a tuple to a list outside of the loop is more efficient.

## Part 4

In [None]:
#Intro to Pandas DataFrame Iteration

In [None]:
#The main concept of Pandas is the DataFrame. A tabular Data Structure with labeled
#rows and columns
# It is built on top of the Numpy Array Structure!!

In [None]:
#Calculating Wins% with Two DF columns: Wins and Games played.

def calc_winning_perc(wins, games_played):
    
    win_perc = wins/games_played
    
    return np.round(win_perc,2)

In [None]:
#Calculating Runs Difference
def calc_run_diff(runs_scored, runs_allowed):

    run_diff = runs_scored - runs_allowed

    return run_diff

In [None]:
win_perc = calc_winning_perc(50,100)
print(win_perc)

In [None]:
# .iloc- You loop over a dataframe with .iloc

row = baseball_df.iloc[i]

In [None]:
#Pandas comes with some efficient methods for looping over a data frame:
# .iterrows()

#This is similar to the .iloc method but .iterrows() returns each row as a tuple of index
#panda series pairs.

#.iterrows() returns each DataFrame row as a tuple of (index, pandas Series) pairs

for i,row in baseball_df.iterrows():

In [None]:
# .iterrows is much faster than .iloc to iterrate over a dataframe

In [None]:
# USING .itertuples
#This is another iteration method. It is often more efficient.

In [None]:
# .iterrows returns each row as a SERIES OF TUPLES PAIRS.

In [None]:
# .itertuples returns each row as a special data type called a "Named Tuple"
# Example : Pandas(Index=1, Team = "ATL", Year = "2012", W = 95)

In [None]:
# These tuples have fields accessible via Attribute Lookup. This means we can access the
# values using a lookup.
#Itertuples store its data more efficiently.

In [None]:
# Loop over the DataFrame and print each row's Index, Year and Wins (W)
for row in rangers_df.itertuples():
  i = row.Index
  year = row.Year
  wins = row.W
  
  # Check if rangers made Playoffs (1 means yes; 0 means no)
  if row.Playoffs == 1:
    print(i, year, wins)

In [None]:
#Pandas Alternatives to Looping

In [None]:
#There are alternatives to Looping. One alternative is to use "pandas.apply" method
#This function acts like the map we have applied in the past
#Since it is tabular data pandas.apply must specifyt an axis to apply the function 
#on (0=columns, 1=rows) 
#pd.apply can be used with anonymous lambda functions too

In [None]:
#EXAMPLE:
run_differentials_apply=baseball_df.apply(lambda x: calc_run_diff(x["RS"],x["RA"]),axis=1)

baseball_df["RD"] = run_diffs_apply

In [None]:
# OPTIMAL PANDAS ITERATING

In [None]:
#Pandas is a library that is build on Numpy. So you can take advantage of the Broadcasting
#functionality of the arrays!

In [None]:
#Vectorize- When an operation operates on an entire dataset at once
#Just like Numpy, Pandas are built to Vectorize operations as well.

In [None]:
#With vectorizing you no longer have to iterate across rows. You can vectorize to run the
#operation ALL AT ONCE.

run_diffs_np = baseball_df["RS"].values - baseball_df["RA"].values
baseball_df["RD"] = run_diffs_np
print(baseball_df)