In [1]:
import pandas as pd

# Series
Series should be a column in tabular world

## Create series from list

### String series

In [2]:
ice_creame = ["Choclate", "vanilla", "Strawberry", "Run Raisin"]

pd.Series(data=ice_creame)

0      Choclate
1       vanilla
2    Strawberry
3    Run Raisin
dtype: object

### Integer series

In [3]:
lottery = [4, 8, 9, 10]
pd.Series(data=lottery)

0     4
1     8
2     9
3    10
dtype: int64

### Boolean series

In [4]:
registrations = [True, True, True]
pd.Series(data=registrations, index=["gal", "ori", "miryam"])

gal       True
ori       True
miryam    True
dtype: bool

## Create series from dict

Series can accessed by index or label from dict (aka index)

In [5]:
sushi = {
    "Salmon": "Orange",
    "Tuna": "red",
    "Eel": "Brown"
}
pd.Series(data=sushi)

Salmon    Orange
Tuna         red
Eel        Brown
dtype: object

## Series methods

In [6]:
prices = [1, 2, 3, 4]
prices_series = pd.Series(data=prices)
print(f"Prices sum {prices_series.sum()}")
print(f"Prices product {prices_series.product()}")
print(f"Prices average {prices_series.mean()}")

Prices sum 10
Prices product 24
Prices average 2.5


## Series attributes

In [7]:
print(f"Series dtype {prices_series.dtype}")
print(f"Series size {prices_series.size}")
print(f"Series element is unique {prices_series.is_unique}")
print(f"Series underling numpy {type(prices_series.values)} {prices_series.values}")
print(f"Series indexes {type(prices_series.index)} {prices_series.index}")

Series dtype int64
Series size 4
Series element is unique True
Series underling numpy <class 'numpy.ndarray'> [1 2 3 4]
Series indexes <class 'pandas.core.indexes.range.RangeIndex'> RangeIndex(start=0, stop=4, step=1)


## Paramaters and argumets
Parameter - The name we give to an expected input

Argument - The concrete value that we provide to a parameter

In [8]:
fruits = ["Apple", "Orange", "Plum", "Grape"]
weekdays = ["Sunday", "Monday", "Friday", "Friday"]
pd.Series(data=fruits, index=weekdays)

Sunday     Apple
Monday    Orange
Friday      Plum
Friday     Grape
dtype: object

## Read Series from CSV

In [9]:
!ls

Data - Multiple Worksheets.xlsx      employees.csv
Data - Single Worksheet.xlsx         foods.csv
Restaurant - Customers.csv           fortune1000.csv
Restaurant - Foods.csv               google_stock_price.csv
Restaurant - Week 1 Sales.csv        jamesbond.csv
Restaurant - Week 1 Satisfaction.csv nba.csv
Restaurant - Week 2 Sales.csv        pokemon.csv
Series.ipynb                         quarters.csv
bigmac.csv                           revenue.csv
chicago.csv                          salesmen.csv
ecommerce.csv                        worldstats.csv


In [10]:
pokemon_series = pd.read_csv(filepath_or_buffer="pokemon.csv", usecols=["Pokemon"]).squeeze()

In [11]:
stock_series = pd.read_csv(filepath_or_buffer="google_stock_price.csv").squeeze()

## Head and tail 

In [12]:
pokemon_series.head(8)

0     Bulbasaur
1       Ivysaur
2      Venusaur
3    Charmander
4    Charmeleon
5     Charizard
6      Squirtle
7     Wartortle
Name: Pokemon, dtype: object

In [13]:
stock_series.tail(3)

3009    773.18
3010    771.61
3011    782.22
Name: Stock Price, dtype: float64

## Python Series to Python builtin functions

In [14]:
print(f"Len {len(pokemon_series)}")
print(f"Type {type(pokemon_series)}")
print(f"Dir {dir(pokemon_series)}")
print(f"Sorted {sorted(pokemon_series)}")
print(f"As dict {dict(pokemon_series)}")
print(f"Max {max(stock_series)}")
print(f"Min {min(stock_series)}")

Len 721
Type <class 'pandas.core.series.Series'>
Dir ['T', '_AXIS_LEN', '_AXIS_ORDERS', '_AXIS_TO_AXIS_NUMBER', '_HANDLED_TYPES', '__abs__', '__add__', '__and__', '__annotations__', '__array__', '__array_priority__', '__array_ufunc__', '__array_wrap__', '__bool__', '__class__', '__contains__', '__copy__', '__deepcopy__', '__delattr__', '__delitem__', '__dict__', '__dir__', '__divmod__', '__doc__', '__eq__', '__finalize__', '__float__', '__floordiv__', '__format__', '__ge__', '__getattr__', '__getattribute__', '__getitem__', '__getstate__', '__gt__', '__hash__', '__iadd__', '__iand__', '__ifloordiv__', '__imod__', '__imul__', '__init__', '__init_subclass__', '__int__', '__invert__', '__ior__', '__ipow__', '__isub__', '__iter__', '__itruediv__', '__ixor__', '__le__', '__len__', '__long__', '__lt__', '__matmul__', '__mod__', '__module__', '__mul__', '__ne__', '__neg__', '__new__', '__nonzero__', '__or__', '__pos__', '__pow__', '__radd__', '__rand__', '__rdivmod__', '__reduce__', '__reduce

## Sort values

In [15]:
stock_series.sort_values(ascending=True).head()

11    49.95
9     50.07
0     50.12
10    50.70
12    50.74
Name: Stock Price, dtype: float64

In [16]:
stock_series.sort_values(ascending=False).head()

3011    782.22
2859    776.60
3009    773.18
3007    772.88
3010    771.61
Name: Stock Price, dtype: float64

## Sort indexes

In [18]:
stock_series.sort_index(ascending=False)

3011    782.22
3010    771.61
3009    773.18
3008    771.07
3007    772.88
         ...  
4        52.95
3        52.38
2        54.65
1        54.10
0        50.12
Name: Stock Price, Length: 3012, dtype: float64

## Inclusion

`in` keyword in indexes:

In [21]:
0 in stock_series

True

`in` keyword in values:

In [22]:
782.22 in stock_series.values

True

## Extract by index or label

In [33]:
example_extraction = pd.Series(data={"bee": "gal", "doo": "Miryam"})
print(f"Extract by index: {example_extraction[0]}")
print(f"Extract by label: {example_extraction['bee']}")
print(f"Extract by index: {example_extraction.get(0)}")
print(f"Extract by index (default vlaue): {example_extraction.get(10, 'Nonsense')}")
print(f"Extract by label: {example_extraction.get('bee')}")
print(f"Extract by label (default vlaue): {example_extraction.get('ori', 'Nonsense')}")
print(f"Multiple (default vlaue): {example_extraction.get([0,20] , 'Nonsense')}")

Extract by index: gal
Extract by label: gal
Extract by index: gal
Extract by index (default vlaue): Nonsense
Extract by label: gal
Extract by label (default vlaue): Nonsense
Multiple (default vlaue): Nonsense


## Overwrite a Series value 

In [40]:
pokemon = pd.read_csv("pokemon.csv", usecols=["Pokemon"]).squeeze("columns")
pokemon.head()

0     Bulbasaur
1       Ivysaur
2      Venusaur
3    Charmander
4    Charmeleon
Name: Pokemon, dtype: object

In [43]:
pokemon[1500] = "Gal"
pokemon.tail()

717       Zygarde
718       Diancie
719         Hoopa
720     Volcanion
1500          Gal
Name: Pokemon, dtype: object

In [46]:
pokemon[[1,2,4]] = ["Gal", "Miryam", "Ori"]
pokemon.head()

0     Bulbasaur
1           Gal
2        Miryam
3    Charmander
4           Ori
Name: Pokemon, dtype: object

In [49]:
pokemon = pd.read_csv("pokemon.csv", index_col="Pokemon").squeeze("columns")
pokemon[["Bulbasaur", "Ivysaur"]] = ["Gal", "Miryam"]
pokemon.head()

Pokemon
Bulbasaur        Gal
Ivysaur       Miryam
Venusaur       Grass
Charmander      Fire
Charmeleon      Fire
Name: Type, dtype: object

## Copy method

When not cooping it will have side effect by changing underlying objects:

In [54]:
pokemon_df = pd.read_csv("pokemon.csv", index_col="Pokemon")
pokemon_series = pokemon_df.squeeze("columns")

In [56]:
pokemon_series[0] = "Whatever"
pokemon_series.head()

Pokemon
Bulbasaur     Whatever
Ivysaur          Grass
Venusaur         Grass
Charmander        Fire
Charmeleon        Fire
Name: Type, dtype: object

In [57]:
pokemon_df.head()

Unnamed: 0_level_0,Type
Pokemon,Unnamed: 1_level_1
Bulbasaur,Whatever
Ivysaur,Grass
Venusaur,Grass
Charmander,Fire
Charmeleon,Fire


In [58]:
pokemon_df = pd.read_csv("pokemon.csv", index_col="Pokemon")
pokemon_series = pokemon_df.squeeze("columns").copy()

In [59]:
pokemon_series[0] = "Whatever"
pokemon_series.head()

Pokemon
Bulbasaur     Whatever
Ivysaur          Grass
Venusaur         Grass
Charmander        Fire
Charmeleon        Fire
Name: Type, dtype: object

In [60]:
pokemon_df.head()

Unnamed: 0_level_0,Type
Pokemon,Unnamed: 1_level_1
Bulbasaur,Grass
Ivysaur,Grass
Venusaur,Grass
Charmander,Fire
Charmeleon,Fire


## Inplace parameter (Going to be deprecated)

In [71]:
stock_google_series = (
    pd
    .read_csv("google_stock_price.csv", usecols=["Stock Price"])
    .squeeze("columns")
    .copy()
)

In [72]:
stock_google_series.sort_values()
stock_google_series.head()

0    50.12
1    54.10
2    54.65
3    52.38
4    52.95
Name: Stock Price, dtype: float64

In [75]:
stock_google_series.sort_values(inplace=True)
stock_google_series.head()

11    49.95
9     50.07
0     50.12
10    50.70
12    50.74
Name: Stock Price, dtype: float64

In [76]:
stock_google_series = (
    pd
    .read_csv("google_stock_price.csv", usecols=["Stock Price"])
    .squeeze("columns")
)
stock_google_series.sort_values(inplace=True)

ValueError: This Series is a view of some other array, to sort in-place you must create a copy

## Math methods on Series object

In [80]:
print(f"Count (Not missing values): {stock_google_series.count()}")
print(f"Sum: {stock_google_series.sum()}")
print(f"Mean: {stock_google_series.mean()}")
print(f"Product: {stock_google_series.product()}")
print(f"Min: {stobck_google_series.min()}")
print(f"Max: {stock_google_series.max()}")
print(f"Average: {stock_google_series.max()}")
stock_google_series.describe()

Count (Not missing values): 3012
Sum: 1006942.0
Mean: 334.31009296148744
Product: inf
Min: 49.95
Max: 782.22
Average: 782.22


count    3012.000000
mean      334.310093
std       173.187205
min        49.950000
25%       218.045000
50%       283.315000
75%       443.000000
max       782.220000
Name: Stock Price, dtype: float64

## Broadcasting (Performing meth operation on all series values)

In [83]:
stock_google_series = (
    pd
    .read_csv("google_stock_price.csv", usecols=["Stock Price"])
    .squeeze("columns")
)
stock_google_series.head()

0    50.12
1    54.10
2    54.65
3    52.38
4    52.95
Name: Stock Price, dtype: float64

In [86]:
stock_google_series + 10
stock_google_series.add(10)
stock_google_series - 10
stock_google_series * 10

0        501.2
1        541.0
2        546.5
3        523.8
4        529.5
         ...  
3007    7728.8
3008    7710.7
3009    7731.8
3010    7716.1
3011    7822.2
Name: Stock Price, Length: 3012, dtype: float64

## The value_counts method

In [95]:
pokemon = pd.read_csv("pokemon.csv", index_col="Pokemon").squeeze("columns")
pokemon.headad()

Pokemon
Bulbasaur     Grass
Ivysaur       Grass
Venusaur      Grass
Charmander     Fire
Charmeleon     Fire
Name: Type, dtype: object

In [96]:
pokemon.value_counts().head()

Water     105
Normal     93
Grass      66
Bug        63
Fire       47
Name: Type, dtype: int64

In [98]:
pokemon.value_counts(normalize=True) * 100

Water       14.563107
Normal      12.898752
Grass        9.153953
Bug          8.737864
Fire         6.518724
Psychic      6.518724
Rock         5.686546
Electric     4.993065
Ground       4.160888
Poison       3.883495
Dark         3.883495
Fighting     3.467406
Dragon       3.328710
Ghost        3.190014
Ice          3.190014
Steel        3.051318
Fairy        2.357836
Flying       0.416089
Name: Type, dtype: float64

In [99]:
pokemon.value_counts(sort=False)

Grass        66
Fire         47
Water       105
Bug          63
Normal       93
Poison       28
Electric     36
Ground       30
Fairy        17
Fighting     25
Psychic      47
Rock         41
Ghost        23
Ice          23
Dragon       24
Dark         28
Steel        22
Flying        3
Name: Type, dtype: int64

In [100]:
len(pokemon)

721

## Apply method

In [101]:
pokemon.apply(len)

Pokemon
Bulbasaur     5
Ivysaur       5
Venusaur      5
Charmander    4
Charmeleon    4
             ..
Yveltal       4
Zygarde       6
Diancie       4
Hoopa         7
Volcanion     4
Name: Type, Length: 721, dtype: int64

In [102]:
def rank_pokemon(pokemon_type):
    if pokemon_type in ["Grass", "Fire", "Water"]:
        return "Classic"
    elif pokemon_type == "Normal":
        return "Boring"
    return "TBD"

pokemon.apply(rank_pokemon)

Pokemon
Bulbasaur     Classic
Ivysaur       Classic
Venusaur      Classic
Charmander    Classic
Charmeleon    Classic
               ...   
Yveltal           TBD
Zygarde           TBD
Diancie           TBD
Hoopa             TBD
Volcanion     Classic
Name: Type, Length: 721, dtype: object

## Map method

In [104]:
mapping = {
    "Grass": "Classic",
    "Fire": "Classic",
    "Water": "Classic",
    "Normal": "Boring"
}
pokemon.map(mapping)

Pokemon
Bulbasaur     Classic
Ivysaur       Classic
Venusaur      Classic
Charmander    Classic
Charmeleon    Classic
               ...   
Yveltal           NaN
Zygarde           NaN
Diancie           NaN
Hoopa             NaN
Volcanion     Classic
Name: Type, Length: 721, dtype: object