In [1]:
# This is Lecture03 - Exercise 2
# of the "Data Science" class at Technische Hochschule Rosenheim

<img src="figures/pokemon-sign.svg" alt="PokemonSign" align="right" width="150"/>

# Pokemon Dataset

In this exercise you will continue to analyse the Pokemon dataset.

### Exercises 3

* 3a) load the dataset

* 3b) print (in one line) the min, mean, max and median of the hitpoints

* 3c) compute a one-dim array containing the sum of Attack and Defense for each Pokemon

* 3d) compute a list 'all' containing the sum of all 6 stats for each Pokemon (using a list comprehension)

* 3e) use boolean indexing and your 'all' list to find the name of the Pokemon with the highest sum of all 6 stats

In [2]:
## ---------- SOLUTION 3a

In [3]:
import numpy as np

In [4]:
ids = np.load('data/pokemon_ids.npy')
names = np.load('data/pokemon_names.npy', allow_pickle=True)
stats = np.load('data/pokemon_stats.npy')

In [5]:
## ---------- SOLUTION 3b

In [6]:
hp = stats[:, 0]
print(f'HP: min={hp.min()}, mean={hp.mean()}, max={hp.max()}, median={np.median(hp)}')

HP: min=1, mean=69.25875, max=255, median=65.0


In [7]:
## ---------- SOLUTION 3c

In [8]:
# some possible solutions:
ad = np.add(stats[:, 1], stats[:,2])    # use the np.add function explicitly
ad = stats[:, 1] + stats[:,2]           # use the np.add function via infix notation
ad = stats[:,1:3].sum(1)[:10]           # use the sum function on the right axis
print(ad[:10])

[ 98 125 165 223  95 122 162 241 182 113]


In [9]:
## ---------- SOLUTION 3d

In [10]:
all_list = [np.sum(x) for x in stats]
print(all_list[:10])

[318, 405, 525, 625, 309, 405, 534, 634, 634, 314]


In [11]:
# higher performance using np instead of the comprehension (but we wanted to practise comprehensions!)
all_nparray=stats.sum(1)
print(all_nparray[:10])

[318 405 525 625 309 405 534 634 634 314]


In [12]:
## ---------- SOLUTION 3e

In [13]:
print(names[all_nparray == all_nparray.max()])

['MewtwoMega Mewtwo X' 'MewtwoMega Mewtwo Y' 'RayquazaMega Rayquaza']


### Exercises 4

* 4a) find the names of all Pokemon where the sum of defense and special defense is at least twice the sum of attack and special attack (let's call them the 'strong defenders')

* 4b) create a 1-dim array 'att_or_def' stating for each Pokemon if it is a 'Defender' or an 'Attacker' depending on the condition from the previous exercise, using the 'where' method.

* 4c) create a dict where the key are the names of the Pokemon and the value is 'Defender' or 'Attacker' (hint: use a 'dict' comprehension and the *zip* function)

In [14]:
## ---------- SOLUTION 4a

In [15]:
# using the numpy sum function (fastest)
strong_defenders = np.sum(stats[:,(2,4)], axis=1) >= 2*(np.sum(stats[:,(1,3)], axis=(1)))
# using the Python sum function (slower)
strong_defenders = stats[:,(2,4)].sum(1) >= 2*(stats[:,(1,3)].sum(1))
# using addition instead of sum
strong_defenders = stats[:,2]+stats[:,4] >= 2*(stats[:,1]+stats[:,3])

print(names[strong_defenders])

['Onix' 'Chansey' 'Magikarp' 'Togepi' 'Marill' 'Shuckle' 'Smeargle'
 'Azurill' 'Nosepass' 'Feebas' 'Duskull' 'Dusclops' 'Wynaut' 'Regirock'
 'Regice' 'Registeel' 'DeoxysDefense Forme' 'Shieldon' 'Bastiodon'
 'Bronzor' 'Happiny' 'Mantyke' 'Probopass' 'Ferroseed'
 'AegislashShield Forme' 'Carbink']


In [16]:
## ---------- SOLUTION 4b

In [17]:
att_or_def = np.where(strong_defenders, 'Defender', 'Attacker')

In [18]:
## ---------- SOLUTION 4c

In [19]:
ad_dict = {x:y for x, y in zip(names, att_or_def)}       # with a dict comprehension
ad_dict = dict(zip(names, att_or_def))                   # with dict/zip

In [20]:
print(list(ad_dict.items())[:10])                        # show 10 entries of the dict

[('Bulbasaur', 'Attacker'), ('Ivysaur', 'Attacker'), ('Venusaur', 'Attacker'), ('VenusaurMega Venusaur', 'Attacker'), ('Charmander', 'Attacker'), ('Charmeleon', 'Attacker'), ('Charizard', 'Attacker'), ('CharizardMega Charizard X', 'Attacker'), ('CharizardMega Charizard Y', 'Attacker'), ('Squirtle', 'Attacker')]


### Exercises 5

At the start of a new game, each player get's random set of Pokemon to start with. For each Pokemon, its chance is 1.5% to be in this list.

* 5a) How many Pokemon do you expect in this starting list?
* 5b) Generate such a 'my_pokemon' set (make sure you think about and pick a suitable data type!) containing the names of the Pokemon. How many does your set contain?  
Hint: **np.random.uniform** may come in handy.
* 5c) Compute the total hit points your starter Pokemon can sustain (i.e. the sum of the hitpoints of your starter Pokemon)

In [21]:
## ---------- SOLUTION 5a

In [22]:
# This sounds easy, and it is - but note that we can only have the non-stone-evolved forms in our
# list, as the stone-evolved form is temporary, so the correct solution is not 0.015 * len(names) 
# but rather
0.015 * len(np.unique(ids))

10.815

In [23]:
## ---------- SOLUTION 5b

In [24]:
# seed the random number generator to make the following cells reproducible
np.random.seed(1)

In [25]:
# we will be using a numpy array, because it is powerful and works well. we could use a regular python list as well.
# there is also a class 'set' which could be used
starter_pokemon_b = np.random.uniform(size=(len(names)))<=0.015

In [26]:
# show the indizes of all potentially chosen starter pokemon
idx = [idx for idx in np.arange(0,len(ids))[starter_pokemon_b]]
print(idx)

[2, 98, 149, 196, 250, 441, 443, 487, 538, 545, 563, 569, 677, 690, 739, 744, 765]


In [27]:
print(names[starter_pokemon_b])

['Venusaur' 'Cloyster' 'Omanyte' 'AmpharosMega Ampharos' 'Phanpy' 'Starly'
 'Staraptor' 'Mime Jr.' 'Mesprit' 'GiratinaOrigin Forme' 'Patrat'
 'Liepard' 'Shelmet' 'Vullaby' 'Florges' 'Furfrou' 'Heliolisk']


In [28]:
# filter out all "form" pokemon
starter_pokemon_idx = [idx for idx in np.arange(0,len(ids))[starter_pokemon_b] if idx==1 or ids[idx-1]!=ids[idx]]
starter_pokemon = names[starter_pokemon_idx]
print(starter_pokemon)

['Venusaur' 'Cloyster' 'Omanyte' 'Phanpy' 'Starly' 'Staraptor' 'Mime Jr.'
 'Mesprit' 'Patrat' 'Liepard' 'Shelmet' 'Vullaby' 'Florges' 'Furfrou'
 'Heliolisk']


In [29]:
len(starter_pokemon)

15

In [30]:
## ---------- SOLUTION 5c

In [31]:
# compute the combined health
ch = np.sum(stats[:,0][starter_pokemon_idx])
# Alternatively, we can use the names to compute the boolean indexing array 
ch = np.sum(stats[:,0][np.array([(x in starter_pokemon) for x in names])])
# Or, even shorter, you do not actually need to convert the boolean array to an np.array
#  (the line above may be easier to understand, though)
ch = np.sum(stats[:,0][[(x in starter_pokemon) for x in names]])

print(ch)

924


---

In [32]:
# alternative SOLUTION 5b & 5c using return_index parameter of np.unique

In [33]:
uix = np.unique(ids, return_index=True)[1]

In [34]:
# compute potential starter (ps) ids, names, stats
ps_ids = ids[uix] # this is identical to np.unique(ids, return_index=True)[0] = arange(1, 722)
ps_names = names[uix]
ps_stats = stats[uix]

In [35]:
# init random number generator to make result deterministic
np.random.seed(42)
# computer starter ids
starter_pokemon_b = np.random.uniform(size=(len(ps_ids)))<=0.015

In [36]:
print(ps_names[starter_pokemon_b])

['Tentacruel' 'Magikarp' 'Dunsparce' 'Snubbull' 'Swablu' 'Cranidos'
 'Lumineon' 'Glaceon' 'Tirtouga']


In [37]:
np.sum(ps_stats[:,0][starter_pokemon_b])

560

---