# Series

- Series are like a single column of data
- Series combine the best parts of lists and

In [2]:
import pandas as pd

In [5]:
ice_cream = ["Chocolate", "Vanilla", "Strawberry", "Rum Raisin"]

ice_cream_series = pd.Series(ice_cream)

0     Chocolate
1       Vanilla
2    Strawberry
3    Rum Raisin
dtype: object

In [7]:
lottery_numbers = [4,8,15,16,23,42]
pd.Series(lottery_numbers)

0     4
1     8
2    15
3    16
4    23
5    42
dtype: int64

## Series from Dicts

In [12]:
sushi = {
    "Salmon": "Orange",
    "Tuna": "Red",
    "Eel": "Brown"
}
pd.Series(sushi)

Salmon    Orange
Tuna         Red
Eel        Brown
dtype: object

- Pandas series object is actually kept in order unlike a regular python dictionary
- Keys are index labels of series so index numbers are not implicitly generated to
access the values

## Series Methods Intro

In [15]:
prices = pd.Series([2.99, 4.45, 1.36])
prices

0    2.99
1    4.45
2    1.36
dtype: float64

In [16]:
prices.sum()

np.float64(8.8)

In [17]:
prices.product()

np.float64(18.095480000000006)

In [18]:
# Average
prices.mean()

np.float64(2.9333333333333336)

In [19]:
# Standard Deviation
prices.std()

np.float64(1.5457791994115246)

## Series Attributes Intro

In [28]:
adjectives = pd.Series(["Smart", "Handsome", "Charming", "Brilliant", "Humble", "Smart"])

In [29]:
adjectives.size

6

In [30]:
adjectives.is_unique

False

In [35]:
# Pandas series are composed of several objects in here we see how the
# Series is composed of the values and index objects

(adjectives.values, # Get back original object,
adjectives.index) # Get back objects index object

(array(['Smart', 'Handsome', 'Charming', 'Brilliant', 'Humble', 'Smart'],
       dtype=object),
 RangeIndex(start=0, stop=6, step=1))

In [40]:
type(adjectives.values) # Pandas composes a numpy data structure ndarray

numpy.ndarray

In [43]:
type(adjectives.index)

pandas.core.indexes.range.RangeIndex

# Series Params

In [45]:
fruits = ["Apple", "Orange", "Plumb", "Grape", "Blueberry", "Watermelon"]
weekdays = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Monday"]

pd.Series(fruits, weekdays)
pd.Series(data=fruits, index=weekdays)

Monday            Apple
Tuesday          Orange
Wednesday         Plumb
Thursday          Grape
Friday        Blueberry
Monday       Watermelon
dtype: object

## Load Datasets

In [25]:
pokemon = pd.read_csv("pokemon.csv", usecols=['Name']).squeeze("columns")
pokemon

0          Bulbasaur
1            Ivysaur
2           Venusaur
3         Charmander
4         Charmeleon
            ...     
1005    Iron Valiant
1006        Koraidon
1007        Miraidon
1008    Walking Wake
1009     Iron Leaves
Name: Name, Length: 1010, dtype: object

In [4]:
google = pd.read_csv("google_stock_price.csv", usecols=['Price']).squeeze("columns")
google

0         2.490664
1         2.515820
2         2.758411
3         2.770615
4         2.614201
           ...    
4788    132.080002
4789    132.998001
4790    135.570007
4791    137.050003
4792    138.429993
Name: Price, Length: 4793, dtype: float64

## The Head and Tail Method

In [5]:
(pokemon.head(10), pokemon.tail(10))

(0     Bulbasaur
 1       Ivysaur
 2      Venusaur
 3    Charmander
 4    Charmeleon
 5     Charizard
 6      Squirtle
 7     Wartortle
 8     Blastoise
 9      Caterpie
 Name: Name, dtype: object,
 1000        Wo-Chien
 1001       Chien-Pao
 1002         Ting-Lu
 1003          Chi-Yu
 1004    Roaring Moon
 1005    Iron Valiant
 1006        Koraidon
 1007        Miraidon
 1008    Walking Wake
 1009     Iron Leaves
 Name: Name, dtype: object)

## Pandas and Python Built-Ins

In [64]:
print(
    type(pokemon),
    list(pokemon)[0:5],
    dict(pokemon)[0], # dict has number as key since series has num as key
    sorted(pokemon)[0:5],
    max(pokemon),
    min(pokemon),
    0 in pokemon,
    0 in pokemon.index,
    'Abra' in pokemon.values,
    sep=";\n"
)

<class 'pandas.core.series.Series'>;
['Bulbasaur', 'Ivysaur', 'Venusaur', 'Charmander', 'Charmeleon'];
Bulbasaur;
['Abomasnow', 'Abra', 'Absol', 'Accelgor', 'Aegislash'];
Zygarde;
Abomasnow;
True;
True;
True


## The sort_values Method

In [75]:
print(
    google.sort_values(ascending=True).head(), 
    google.sort_values(ascending=False).head(), 
    sep=";\n"
)

10    2.470490
0     2.490664
13    2.509095
11    2.514326
12    2.515820
Name: Price, dtype: float64;
4395    151.863495
4345    151.000000
4346    150.141754
4336    150.000000
4341    150.000000
Name: Price, dtype: float64


## The sort_index Method

In [7]:
pokemon_full_series = pd.read_csv("pokemon.csv", index_col="Name").squeeze("columns")
pokemon_full_series.head()

Name
Bulbasaur     Grass, Poison
Ivysaur       Grass, Poison
Venusaur      Grass, Poison
Charmander             Fire
Charmeleon             Fire
Name: Type, dtype: object

In [82]:
pokemon_full_series.sort_index()

Name
Abomasnow        Grass, Ice
Abra                Psychic
Absol                  Dark
Accelgor                Bug
Aegislash      Steel, Ghost
                  ...      
Zoroark                Dark
Zorua                  Dark
Zubat        Poison, Flying
Zweilous       Dark, Dragon
Zygarde      Dragon, Ground
Name: Type, Length: 1010, dtype: object

## Extract Series Value by Index Position

In [83]:
pokemon.head()

0     Bulbasaur
1       Ivysaur
2      Venusaur
3    Charmander
4    Charmeleon
Name: Name, dtype: object

In [92]:
print(pokemon.iloc, pokemon.iloc[0], pokemon.iloc[200:205], pokemon.iloc[[100,200,300]], sep=";\n")

<pandas.core.indexing._iLocIndexer object at 0x7bab895433e0>;
Bulbasaur;
200         Unown
201     Wobbuffet
202     Girafarig
203        Pineco
204    Forretress
Name: Name, dtype: object;
100    Electrode
200        Unown
300     Delcatty
Name: Name, dtype: object


## Extract Series Value by Index Label

In [94]:
pokemon_full_series.loc["Bulbasaur"]

'Grass, Poison'

In [95]:
pokemon_full_series.loc[["Mewtwo", "Mew", "Lugia"]]

Name
Mewtwo            Psychic
Mew               Psychic
Lugia     Psychic, Flying
Name: Type, dtype: object

## The Get Method On a Series
- Get allows for safe access to a value in a series by key, a `default` kwarg may be specified
- Shouldn't be used for index positions ie `iloc`

In [99]:
print(pokemon_full_series.get("Moltres"), pokemon_full_series.get("Augumon"))

Fire, Flying None


In [102]:
pokemon_full_series.get(["Augumon", "Zapados"], "default in multi")

'default in multi'

## Overwrite a Series Value

In [117]:
pokemon_2 = pokemon.copy()
pokemon_2.head()

0     Bulbasaur
1       Hamomon
2     HitmonHam
3    Charmander
4    Hamasauras
Name: Name, dtype: object

In [105]:
pokemon_2.iloc[0] = "Augumon"

In [108]:
pokemon_2.head()

0       Augumon
1       Ivysaur
2      Venusaur
3    Charmander
4    Charmeleon
Name: Name, dtype: object

In [112]:
pokemon_2.iloc[[1,2,4]] = ("Hamomon", "HitmonHam", "Hamasauras")

In [120]:
pokemon_2.head()

0     Bulbasaur
1       Hamomon
2     HitmonHam
3    Charmander
4    Hamasauras
Name: Name, dtype: object

In [121]:
pokemon_2 = pokemon_full_series.copy()
pokemon_2.head()

Name
Bulbasaur     Grass, Poison
Ivysaur       Grass, Poison
Venusaur      Grass, Poison
Charmander             Fire
Charmeleon             Fire
Name: Type, dtype: object

In [122]:
pokemon_2.loc["Bulbasaur"] = "Fighting, Flying"

In [124]:
pokemon_2.head(1)

Name
Bulbasaur    Fighting, Flying
Name: Type, dtype: object

## The copy Method
- `copy` creates a dupe of the original object
- Some operations within pandas will return a view and not a copy
- Changes to a view do change the original object
  

In [135]:
pokemon_df = pd.read_csv("pokemon.csv", usecols=["Name"])
# Just doing squeeze will give a view... if we do .copy method at end we can get a copy that we
# can safely mutate
poke_series = pokemon_df.squeeze("columns")

In [136]:
"""
poke_series is a view of df pokemon_df... changing the view will modify the original
dataframe
"""
poke_series.iloc[0] = "Nougatmon"

In [137]:
poke_series.head()

0     Nougatmon
1       Ivysaur
2      Venusaur
3    Charmander
4    Charmeleon
Name: Name, dtype: object

In [138]:
pokemon_df.head()

Unnamed: 0,Name
0,Nougatmon
1,Ivysaur
2,Venusaur
3,Charmander
4,Charmeleon


# Math Methods on Series Objects
- `count` return the number of values in series... excludes missing values
- `sum` adds together values
- `product` multiplies all values together
- `mean` gives the average
- `std` standard deviation
- `max` the maximum value in the series
- `min` min value
- `median` returns value in middle of series (sort than mid length)
- `mode` returns the value that occurs most
- `describe` gives summary of mathematical values

In [6]:
google.describe()

count    4793.000000
mean       40.211377
std        37.274753
min         2.470490
25%        12.767395
50%        26.327717
75%        56.311001
max       151.863495
Name: Price, dtype: float64

## Broadcasting
- Apply a mathematical op to every value

In [8]:
google.add(10)

0        12.490664
1        12.515820
2        12.758411
3        12.770615
4        12.614201
           ...    
4788    142.080002
4789    142.998001
4790    145.570007
4791    147.050003
4792    148.429993
Name: Price, Length: 4793, dtype: float64

In [10]:
google + 10 # add 10 to every element also

0        12.490664
1        12.515820
2        12.758411
3        12.770615
4        12.614201
           ...    
4788    142.080002
4789    142.998001
4790    145.570007
4791    147.050003
4792    148.429993
Name: Price, Length: 4793, dtype: float64

In [11]:
google * 2

0         4.981328
1         5.031640
2         5.516822
3         5.541230
4         5.228402
           ...    
4788    264.160004
4789    265.996002
4790    271.140014
4791    274.100006
4792    276.859986
Name: Price, Length: 4793, dtype: float64

## The value_counts Method
- returns the number of times each unique value occurs in the `Series`
- normalize param returns the relative frequencies/percentage of the values instead of counts

In [14]:
pokemon_full_series.value_counts()

Type
Water               74
Normal              74
Grass               46
Psychic             39
Fire                36
                    ..
Rock, Electric       1
Dark, Ground         1
Dragon, Dark         1
Fairy, Fighting      1
Fighting, Dragon     1
Name: count, Length: 200, dtype: int64

In [19]:
pokemon_full_series.value_counts(normalize=True) * 100

Type
Water               7.326733
Normal              7.326733
Grass               4.554455
Psychic             3.861386
Fire                3.564356
                      ...   
Rock, Electric      0.099010
Dark, Ground        0.099010
Dragon, Dark        0.099010
Fairy, Fighting     0.099010
Fighting, Dragon    0.099010
Name: proportion, Length: 200, dtype: float64

## The apply Method
- the apply method is like a map on every `Series` value
  

In [26]:
pokemon.head()

0     Bulbasaur
1       Ivysaur
2      Venusaur
3    Charmander
4    Charmeleon
Name: Name, dtype: object

In [27]:
pokemon.apply(lambda poke: poke.lower())

0          bulbasaur
1            ivysaur
2           venusaur
3         charmander
4         charmeleon
            ...     
1005    iron valiant
1006        koraidon
1007        miraidon
1008    walking wake
1009     iron leaves
Name: Name, Length: 1010, dtype: object

## The map Method
- Only works on series aka 1d data types
- can apply a function or you can put in a dict or series for how to substitute values
- used to replace every value with some translated value

In [30]:
attack_powers = {
    "Grass": 10,
    "Fire": 15,
    "Water": 15,
    "Fairy, Fighting": 20,
    "Grass, Psychic": 50
}

In [32]:
pokemon_full_series.map(attack_powers) # When can't find a key does not a number

Name
Bulbasaur        NaN
Ivysaur          NaN
Venusaur         NaN
Charmander      15.0
Charmeleon      15.0
                ... 
Iron Valiant    20.0
Koraidon         NaN
Miraidon         NaN
Walking Wake     NaN
Iron Leaves     50.0
Name: Type, Length: 1010, dtype: float64

In [33]:
attack_powers_series = pd.Series(attack_powers)

In [34]:
pokemon_full_series.map(attack_powers_series)

Name
Bulbasaur        NaN
Ivysaur          NaN
Venusaur         NaN
Charmander      15.0
Charmeleon      15.0
                ... 
Iron Valiant    20.0
Koraidon         NaN
Miraidon         NaN
Walking Wake     NaN
Iron Leaves     50.0
Name: Type, Length: 1010, dtype: float64