In [22]:
import pandas as pd

# the .get() method on a series:

- it helps to sort data, allows pandas to optimize more easily
- small data sets wont show a difference in speed, but it is really important for larger ones


In [23]:
pokemon = pd.read_csv("pokemon.csv", index_col = "Pokemon", squeeze = True)
pokemon.sort_index(inplace = True)
pokemon.head(3)

Pokemon
Abomasnow      Grass
Abra         Psychic
Absol           Dark
Name: Type, dtype: object

In [24]:
pokemon.get(10)

'Dragon'

In [25]:
pokemon.get("Moltres")

'Fire'

In [26]:
pokemon.get([3, 7, 2])

Pokemon
Accelgor       Bug
Aipom       Normal
Absol         Dark
Name: Type, dtype: object

In [35]:
pokemon.get(["Wooper", "Gengar"])

Pokemon
Wooper    Water
Gengar    Ghost
Name: Type, dtype: object

In [36]:
pokemon.get("Mewthree")
pokemon.get(100000)
pokemon.get(["Pikachu", "Pikatwo"])
pokemon.get([1, 5 , 1000])
# if we provide the .get() method with something that doesn't exist, it returns nothing and does NOT produce an error
# default arguement to .get() is None

In [37]:
pokemon.get(key = "Digimon", default = "This is not a pokemon")

'This is not a pokemon'

In [38]:
pokemon.get(key = "Charmander", default = "This is not a pokemon")

'Fire'

In [40]:
pokemon.get(["Pikachu", "Digimon"], default = "This is not a pokemon")

'This is not a pokemon'

# Math Methods on Series Objects:

In [42]:
google = pd.read_csv("google_stock_price.csv", squeeze = True)
google.head(3)

0    50.12
1    54.10
2    54.65
Name: Stock Price, dtype: float64

In [43]:
google.count()
# count returns a count of the number of valid values within a series
# different than len() because count EXCLUDES null values

3012

In [44]:
google.sum()

1006942.0

In [45]:
google.mean()

334.31009296148744

In [46]:
google.sum() / google.count()

334.3100929614874

In [47]:
google.std()
# standard deviation is the average of the distances from the mean

173.18720477113106

In [48]:
google.min()

49.95

In [49]:
google.max()

782.22

In [51]:
google.median()

283.315

In [52]:
google.mode()
# returns a brand new one item series

0    291.21
dtype: float64

In [53]:
google.describe()
# the percents are percentiles

count    3012.000000
mean      334.310093
std       173.187205
min        49.950000
25%       218.045000
50%       283.315000
75%       443.000000
max       782.220000
Name: Stock Price, dtype: float64

In [59]:
new_google = google.get([10, 25])

In [60]:
new_google.describe()

count     2.000000
mean     55.280000
std       6.477098
min      50.700000
25%      52.990000
50%      55.280000
75%      57.570000
max      59.860000
Name: Stock Price, dtype: float64

# The .idxmax() and .idxmin() methods:

- return the index labels with the greatest or smallesr values in a series

In [62]:
google = pd.read_csv("google_stock_price.csv", squeeze = True)

In [63]:
google.max()

782.22

In [64]:
google.min()

49.95

In [68]:
google.idxmax()

3011

In [72]:
google[3011]

782.22

In [73]:
google.idxmin()

11

In [74]:
google[11]

49.95

In [76]:
google[google.idxmax()]
# ^ does both steps in one, bt essentially does the same as the .max() method

782.22

# The .value_counts() method:

- counts all of the unique values in a series

In [78]:
pokemon = pd.read_csv("pokemon.csv", index_col = "Pokemon", squeeze = True)

In [84]:
pokemon.head(7)

Pokemon
Bulbasaur     Grass
Ivysaur       Grass
Venusaur      Grass
Charmander     Fire
Charmeleon     Fire
Charizard      Fire
Squirtle      Water
Name: Type, dtype: object

In [81]:
pokemon.value_counts()

Water       105
Normal       93
Grass        66
Bug          63
Fire         47
Psychic      47
Rock         41
Electric     36
Ground       30
Poison       28
Dark         28
Fighting     25
Dragon       24
Ghost        23
Ice          23
Steel        22
Fairy        17
Flying        3
Name: Type, dtype: int64

In [85]:
pokemon.value_counts().sum()

721

In [86]:
pokemon.count()

721

In [88]:
pokemon.value_counts(ascending = True)

Flying        3
Fairy        17
Steel        22
Ghost        23
Ice          23
Dragon       24
Fighting     25
Poison       28
Dark         28
Ground       30
Electric     36
Rock         41
Psychic      47
Fire         47
Bug          63
Grass        66
Normal       93
Water       105
Name: Type, dtype: int64

# The .apply() method:

- calls a function on every value that occurs in a series

In [91]:
google = pd.read_csv("google_stock_price.csv", squeeze = True)
google.head(6)

0    50.12
1    54.10
2    54.65
3    52.38
4    52.95
5    53.90
Name: Stock Price, dtype: float64

In [95]:
def classify_performance(number):
    if number < 300:
        return "Poor"
    elif number >= 300 and number < 650:
        return "Satisfactory"
    else:
        return "Incredible!"

In [97]:
google.apply(classify_performance).tail()

3007    Incredible!
3008    Incredible!
3009    Incredible!
3010    Incredible!
3011    Incredible!
Name: Stock Price, dtype: object

In [99]:
# anonymous functions let us define the function within .apply()
# need to write lambda first
google.head(7)

0    50.12
1    54.10
2    54.65
3    52.38
4    52.95
5    53.90
6    53.02
Name: Stock Price, dtype: float64

In [101]:
google.apply(lambda stock_price : stock_price + 5)
# stock_price represents each value

0        55.12
1        59.10
2        59.65
3        57.38
4        57.95
         ...  
3007    777.88
3008    776.07
3009    778.18
3010    776.61
3011    787.22
Name: Stock Price, Length: 3012, dtype: float64

# The .map() method:

- maps the values of a series to another collection of data
- ideal for datasets from two different sources/ data types
- returns the corresponding values from the second series to the indexes of the original series


In [102]:
pokemon_names = pd.read_csv("pokemon.csv", usecols = ["Pokemon"], squeeze = True)
pokemon_names.head(3)

0    Bulbasaur
1      Ivysaur
2     Venusaur
Name: Pokemon, dtype: object

In [109]:
pokemon_types = pd.read_csv("pokemon.csv", index_col = "Pokemon", squeeze = True)
pokemon_types.head(3)

Pokemon
Bulbasaur    Grass
Ivysaur      Grass
Venusaur     Grass
Name: Type, dtype: object

In [110]:
pokemon_names.map(pokemon_types)

0        Grass
1        Grass
2        Grass
3         Fire
4         Fire
        ...   
716       Dark
717     Dragon
718       Rock
719    Psychic
720       Fire
Name: Pokemon, Length: 721, dtype: object

In [111]:
pokemon_names = pd.read_csv("pokemon.csv", usecols = ["Pokemon"], squeeze = True)
pokemon_types = pd.read_csv("pokemon.csv", index_col = "Pokemon", squeeze = True).to_dict()
# second one has .to_dict() at the end, now we have two differnt data types

In [112]:
pokemon_names.head(3)

0    Bulbasaur
1      Ivysaur
2     Venusaur
Name: Pokemon, dtype: object

In [115]:
pokemon_types

{'Bulbasaur': 'Grass',
 'Ivysaur': 'Grass',
 'Venusaur': 'Grass',
 'Charmander': 'Fire',
 'Charmeleon': 'Fire',
 'Charizard': 'Fire',
 'Squirtle': 'Water',
 'Wartortle': 'Water',
 'Blastoise': 'Water',
 'Caterpie': 'Bug',
 'Metapod': 'Bug',
 'Butterfree': 'Bug',
 'Weedle': 'Bug',
 'Kakuna': 'Bug',
 'Beedrill': 'Bug',
 'Pidgey': 'Normal',
 'Pidgeotto': 'Normal',
 'Pidgeot': 'Normal',
 'Rattata': 'Normal',
 'Raticate': 'Normal',
 'Spearow': 'Normal',
 'Fearow': 'Normal',
 'Ekans': 'Poison',
 'Arbok': 'Poison',
 'Pikachu': 'Electric',
 'Raichu': 'Electric',
 'Sandshrew': 'Ground',
 'Sandslash': 'Ground',
 'Nidoran': 'Poison',
 'Nidorina': 'Poison',
 'Nidoqueen': 'Poison',
 'Nidoran♂': 'Poison',
 'Nidorino': 'Poison',
 'Nidoking': 'Poison',
 'Clefairy': 'Fairy',
 'Clefable': 'Fairy',
 'Vulpix': 'Fire',
 'Ninetales': 'Fire',
 'Jigglypuff': 'Normal',
 'Wigglytuff': 'Normal',
 'Zubat': 'Poison',
 'Golbat': 'Poison',
 'Oddish': 'Grass',
 'Gloom': 'Grass',
 'Vileplume': 'Grass',
 'Paras': 'Bug'

In [116]:
pokemon_names.map(pokemon_types)
# since pokemon_types is a dictionary, .map() looks through the keys
# and returns the values

0        Grass
1        Grass
2        Grass
3         Fire
4         Fire
        ...   
716       Dark
717     Dragon
718       Rock
719    Psychic
720       Fire
Name: Pokemon, Length: 721, dtype: object