In [1]:
import pandas as pd

### Create a Series object from a list

1-D labelled array, represents 1 column of data in a pandas dataset

In [2]:
people = ['Jack', 'Park', 'Hems', 'Parker']
pd.Series(people)

0      Jack
1      Park
2      Hems
3    Parker
dtype: object

### Create a Series object from a Dictionary

In [3]:
sushi = {
    'Salmon' : 'Orange',
    'Tuna' : 'Red',
    'Eel' : 'Brown'
}

pd.Series(sushi)

Salmon    Orange
Tuna         Red
Eel        Brown
dtype: object

Here, Series has a custom label, values and numeric index positions(not visible here). Unlike dicts, Series can have duplicate labels(keys in dicts).

### Intro to Methods

In [4]:
prices = pd.Series([3,4,5,6])
prices.sum()
prices.product()
prices.mean()

4.5

### Intro to Attributes

In [6]:
words = pd.Series(['like', 'dont', 'fry', 'pan'])
words

0    like
1    dont
2     fry
3     pan
dtype: object

In [7]:
words.size

4

In [8]:
words.is_unique

True

In [9]:
words.values   # stores values in ndarray

array(['like', 'dont', 'fry', 'pan'], dtype=object)

In [10]:
words.index

RangeIndex(start=0, stop=4, step=1)

In [11]:
type(words.values)

numpy.ndarray

In [12]:
words.dtype

dtype('O')

### Paramters and Arguments

In [9]:
fruits = ['apple', 'pear', 'orange', 'bannana', 'grape', 'cherry']  
days = ['Mon', 'Tue', 'Wed', 'Thurs', 'Fri', 'Fri'] # Duplicate index possible in Series

pd.Series(fruits, days)  # shift + tab to get method param details
pd.Series(data = fruits, index = days) # Explicit  declaration of params

#pd.Series(fruits, index = weekdays) this also works if needed

Mon        apple
Tue         pear
Wed       orange
Thurs    bannana
Fri        grape
Fri       cherry
dtype: object

### Import Series with the pd.read_csv Function

In [10]:
pd.read_csv('pokemon.csv') # creates a pandas dataframe by default(Imports into a dataframe), dataframe used for 1 col or more

Unnamed: 0,Pokemon,Type
0,Bulbasaur,Grass
1,Ivysaur,Grass
2,Venusaur,Grass
3,Charmander,Fire
4,Charmeleon,Fire
...,...,...
716,Yveltal,Dark
717,Zygarde,Dragon
718,Diancie,Rock
719,Hoopa,Psychic


In [11]:
pd.read_csv('pokemon.csv', usecols = ['Pokemon']) # Still gives a dataframe

Unnamed: 0,Pokemon
0,Bulbasaur
1,Ivysaur
2,Venusaur
3,Charmander
4,Charmeleon
...,...
716,Yveltal
717,Zygarde
718,Diancie
719,Hoopa


In [45]:
pokemon = pd.read_csv('pokemon.csv', usecols = ['Pokemon']).squeeze("columns") 
pokemon
# squeeze the df, squeeze() creates a series from 1 col of data

0       Bulbasaur
1         Ivysaur
2        Venusaur
3      Charmander
4      Charmeleon
          ...    
716       Yveltal
717       Zygarde
718       Diancie
719         Hoopa
720     Volcanion
Name: Pokemon, Length: 721, dtype: object

In [19]:
google = pd.read_csv('google_stock_price.csv', usecols = ['Stock Price']).squeeze()
google

0        50.12
1        54.10
2        54.65
3        52.38
4        52.95
         ...  
3007    772.88
3008    771.07
3009    773.18
3010    771.61
3011    782.22
Name: Stock Price, Length: 3012, dtype: float64

### Head and Tail methods on a Series

In [22]:
pokemon.head(n = 7) # returns first n rows of a series, n=5 is default

0     Bulbasaur
1       Ivysaur
2      Venusaur
3    Charmander
4    Charmeleon
5     Charizard
6      Squirtle
Name: Pokemon, dtype: object

In [24]:
pokemon.tail(n = 4), # returns last n rows of a series, n=5 is default

(717      Zygarde
 718      Diancie
 719        Hoopa
 720    Volcanion
 Name: Pokemon, dtype: object,)

### Python Built in Functions with Series

In [26]:
len(pokemon)
type(pokemon)

pandas.core.series.Series

In [None]:
dir(pokemon) # gives all the methods available for datatype

In [38]:
sorted(pokemon) # sorts the series
type(sorted(pokemon)) # returns a list

list

In [None]:
list(pokemon) # coverts series to list

In [None]:
list(pokemon) # converts series to dict (be careful that there is no duplicate for index in series while converting)

In [35]:
max(pokemon) # min, max values of a series
min(pokemon)

'Abomasnow'

### The sort_values Method

In [41]:
pokemon.sort_values(ascending = False).head() # sorts by values and returns a series, descending

717     Zygarde
633    Zweilous
40        Zubat
569       Zorua
570     Zoroark
Name: Pokemon, dtype: object

### The sort_index Method

In [42]:
pokemon = pd.read_csv('pokemon.csv', index_col = 'Pokemon').squeeze() # 2 cols in series, Pokemon used as index col
pokemon.head()

Pokemon
Bulbasaur     Grass
Ivysaur       Grass
Venusaur      Grass
Charmander     Fire
Charmeleon     Fire
Name: Type, dtype: object

In [43]:
pokemon.sort_index(ascending = True) # sorts series by index in ascending order

Pokemon
Abomasnow      Grass
Abra         Psychic
Absol           Dark
Accelgor         Bug
Aegislash      Steel
              ...   
Zoroark         Dark
Zorua           Dark
Zubat         Poison
Zweilous        Dark
Zygarde       Dragon
Name: Type, Length: 721, dtype: object

### In keyword in Series

In [54]:
pokemon = pd.read_csv('Pokemon.csv', usecols = ['Pokemon']).squeeze()
pokemon

0       Bulbasaur
1         Ivysaur
2        Venusaur
3      Charmander
4      Charmeleon
          ...    
716       Yveltal
717       Zygarde
718       Diancie
719         Hoopa
720     Volcanion
Name: Pokemon, Length: 721, dtype: object

In [48]:
'Bulbasaur' in pokemon

False

In [50]:
100 in pokemon    # In case of Series, 'in' checks by default in index column (can use pokemon.index as well)

True

In [52]:
'Bulbasaur' in pokemon.values   # Uses 'in' to check in values col of a Series

True

### Extract Series Value by Index Position

In [55]:
pokemon = pd.read_csv('Pokemon.csv', usecols = ['Pokemon']).squeeze('columns')
pokemon.head()

0     Bulbasaur
1       Ivysaur
2      Venusaur
3    Charmander
4    Charmeleon
Name: Pokemon, dtype: object

In [57]:
pokemon[0] # element extraction similar to list

'Bulbasaur'

In [58]:
pokemon[[100, 200, 300]] # extract values at multiple indexes in Series

100    Electrode
200        Unown
300     Delcatty
Name: Pokemon, dtype: object

In [59]:
pokemon[24: 29] # splicing

24      Pikachu
25       Raichu
26    Sandshrew
27    Sandslash
28      Nidoran
Name: Pokemon, dtype: object

In [66]:
# pokemon[-1]  doesnt work with Pandas Series
pokemon[-15:-11] # when used as range, -ve indexes work in Pandas series

706       Klefki
707     Phantump
708    Trevenant
709    Pumpkaboo
Name: Pokemon, dtype: object

### Extract Series Value by Index Label

In [67]:
pokemon = pd.read_csv('pokemon.csv', index_col = 'Pokemon').squeeze('columns')
pokemon.head()

Pokemon
Bulbasaur     Grass
Ivysaur       Grass
Venusaur      Grass
Charmander     Fire
Charmeleon     Fire
Name: Type, dtype: object

In [68]:
pokemon[[0,2,3]] # extracting by numerical index position still works even with label

Pokemon
Bulbasaur     Grass
Venusaur      Grass
Charmander     Fire
Name: Type, dtype: object

In [69]:
pokemon['Charizard']
pokemon['Digimon'] # causes error since Digimon label does not exist
pokemon[['Mewtwo', 'Sceptile', 'Blastoise']] # extracting by index label

Pokemon
Mewtwo       Psychic
Sceptile       Grass
Blastoise      Water
Name: Type, dtype: object

### The get Method on a Series

In [3]:
pokemon = pd.read_csv('pokemon.csv', index_col = 'Pokemon').squeeze('columns')
pokemon.head()

Pokemon
Bulbasaur     Grass
Ivysaur       Grass
Venusaur      Grass
Charmander     Fire
Charmeleon     Fire
Name: Type, dtype: object

get() extracts values from a Series. Has a fallback case in case the index/label doesnt exist in the Series

In [5]:
pokemon.get(0)
pokemon.get('Mewtwo')
pokemon.get(['Mewtwo', 'Pikachu'])

Pokemon
Mewtwo      Psychic
Pikachu    Electric
Name: Type, dtype: object

In [7]:
pokemon.get(['Digimon', 'Mewtwo'], 'Nonexistent') # has fallback, default fallback is None
pokemon.get([100, 200, 10000], 'Nonexistent')

'Nonexistent'

### Overwrite a Series Value

In [3]:
pokemon = pd.read_csv('Pokemon.csv', usecols = ['Pokemon']).squeeze('columns')
pokemon.head()

0     Bulbasaur
1       Ivysaur
2      Venusaur
3    Charmander
4    Charmeleon
Name: Pokemon, dtype: object

In [5]:
pokemon[0] = "Bulbi" # overwrites the value at index 0 position
pokemon[0]

'Bulbi'

In [7]:
pokemon[1500] = "Sunny" # creates a new entry at index 1500, doesnt fill in the gaps
pokemon

0            Bulbi
1          Ivysaur
2         Venusaur
3       Charmander
4       Charmeleon
           ...    
717        Zygarde
718        Diancie
719          Hoopa
720      Volcanion
1500         Sunny
Name: Pokemon, Length: 722, dtype: object

In [9]:
pokemon[[1,2,4]] = ['Flamon', 'Iceon', 'Wateron'] # replaces values at the 1,2,4 index position
pokemon.head()

0         Bulbi
1        Flamon
2         Iceon
3    Charmander
4       Wateron
Name: Pokemon, dtype: object

In [10]:
pokemon = pd.read_csv('pokemon.csv', index_col = 'Pokemon').squeeze('columns')
pokemon.head()

Pokemon
Bulbasaur     Grass
Ivysaur       Grass
Venusaur      Grass
Charmander     Fire
Charmeleon     Fire
Name: Type, dtype: object

In [11]:
pokemon['Bulbasaur'] = 'Awwesomeon' # overwrite value based on index label
pokemon.head()

Pokemon
Bulbasaur     Awwesomeon
Ivysaur            Grass
Venusaur           Grass
Charmander          Fire
Charmeleon          Fire
Name: Type, dtype: object

In [14]:
pokemon[1] = 'Grasson' # this will also still work here
pokemon.head()

Pokemon
Bulbasaur     Awwesomeon
Ivysaur          Grasson
Venusaur           Grass
Charmander          Fire
Charmeleon          Fire
Name: Type, dtype: object

### The copy Method

In [15]:
pokemon_df = pd.read_csv('pokemon.csv', usecols = ['Pokemon']) # Dataframe
pokemon_series = pokemon_df.squeeze('columns') # Series

In [16]:
pokemon_series[0] = 'Whatever' # updating value in Series
pokemon_series.head(1)

0    Whatever
Name: Pokemon, dtype: object

In [17]:
pokemon_df # value gets updated in Df as well even though we only updated Series

Unnamed: 0,Pokemon
0,Whatever
1,Ivysaur
2,Venusaur
3,Charmander
4,Charmeleon
...,...
716,Yveltal
717,Zygarde
718,Diancie
719,Hoopa


Series is like a view of the larger Dataframe (Not a seperate copy). Thats why Dataframe value gets updated as well and is not treated seperatly. 

Analogy : Paint the door(series) of the house(Df) red. So, the color of the door of the dataframe also changes because it is a part(view) of the house

In [18]:
pokemon_df = pd.read_csv('pokemon.csv', usecols = ['Pokemon']) # Dataframe
pokemon_series = pokemon_df.squeeze('columns').copy() 

# Series here works as a copy completely independent copy of original data source cuz of copy method and not as a view.

# Series here is a distinct entity, changes made to it will not effect original datasource(df)

In [19]:
pokemon_series[0] = 'Whatever' # updating value in Series
pokemon_series.head(1)

0    Whatever
Name: Pokemon, dtype: object

In [20]:
pokemon_df # Value does not get updated as series, pokemon_series, is created as a copy.

Unnamed: 0,Pokemon
0,Bulbasaur
1,Ivysaur
2,Venusaur
3,Charmander
4,Charmeleon
...,...
716,Yveltal
717,Zygarde
718,Diancie
719,Hoopa


### The inplace Parameter

In [2]:
google = (
    pd.read_csv('google_stock_price.csv', usecols = ['Stock Price'])
    .squeeze('columns')
    .copy()
)
google

# series is decoupled from df
# pre req for inplace parameter for mutations -> object(series) cannot be a view, has to be a copy

0        50.12
1        54.10
2        54.65
3        52.38
4        52.95
         ...  
3007    772.88
3008    771.07
3009    773.18
3010    771.61
3011    782.22
Name: Stock Price, Length: 3012, dtype: float64

In [29]:
google = google.sort_values() # sorting the series and reassging to the variabl, to change the original
google.head()

11    49.95
9     50.07
0     50.12
10    50.70
12    50.74
Name: Stock Price, dtype: float64

In [31]:
# Instead of that, we can use inplace parameter

google.sort_values(inplace = True) # this also changes original -> inplace, might be deprecated
google.head()

11    49.95
9     50.07
0     50.12
10    50.70
12    50.74
Name: Stock Price, dtype: float64

### Math Methods on Series Objects

In [6]:
google.count() # counts non-null data -> valid rows
google.sum()
google.mean()
google.product()
google.std() #standard dev
google.min()
google.max()
google.median()
google.mode()
google.describe() # gives general stats as output in a series

count    3012.000000
mean      334.310093
std       173.187205
min        49.950000
25%       218.045000
50%       283.315000
75%       443.000000
max       782.220000
Name: Stock Price, dtype: float64

### Broadcasting

How to perform math operation throughout every element of a series -> Broadcasting

In [8]:
google + 10  # adds 10 to every element of the series, google.
google -30
google * 2

0        100.24
1        108.20
2        109.30
3        104.76
4        105.90
         ...   
3007    1545.76
3008    1542.14
3009    1546.36
3010    1543.22
3011    1564.44
Name: Stock Price, Length: 3012, dtype: float64

### The value_counts Method

Counts the number of times a value occurs in a series and returns it


In [10]:
pokemon = pd.read_csv('pokemon.csv', index_col = 'Pokemon').squeeze('columns')
pokemon.head()

Pokemon
Bulbasaur     Grass
Ivysaur       Grass
Venusaur      Grass
Charmander     Fire
Charmeleon     Fire
Name: Type, dtype: object

In [13]:
pokemon.value_counts()
pokemon.value_counts(sort = False)
pokemon.value_counts(ascending = True).head() # counts value occurences in series

Grass        66
Fire         47
Water       105
Bug          63
Normal       93
Poison       28
Electric     36
Ground       30
Fairy        17
Fighting     25
Psychic      47
Rock         41
Ghost        23
Ice          23
Dragon       24
Dark         28
Steel        22
Flying        3
Name: Type, dtype: int64

In [15]:
pokemon.value_counts(normalize = True) * 100 # Displays occurences in the form of percentages

Water       14.563107
Normal      12.898752
Grass        9.153953
Bug          8.737864
Fire         6.518724
Psychic      6.518724
Rock         5.686546
Electric     4.993065
Ground       4.160888
Poison       3.883495
Dark         3.883495
Fighting     3.467406
Dragon       3.328710
Ghost        3.190014
Ice          3.190014
Steel        3.051318
Fairy        2.357836
Flying       0.416089
Name: Type, dtype: float64

### The apply Method

Applies a function to every value of a Series and returns a new Series

In [16]:
len('Grass')

5

In [17]:
pokemon.apply(len) # applies len to all series values and returns a series.

Pokemon
Bulbasaur     5
Ivysaur       5
Venusaur      5
Charmander    4
Charmeleon    4
             ..
Yveltal       4
Zygarde       6
Diancie       4
Hoopa         7
Volcanion     4
Name: Type, Length: 721, dtype: int64

In [19]:
# Applying a custom function to all series values

def rank_pokemon(pokemon_type):
    if pokemon_type in ['Grass', 'Water', 'Fire']:
        return 'Classic'
    elif pokemon_type == 'Normal':
        return 'Boring'
    else:
        return 'TBD'

In [21]:
pokemon.apply(rank_pokemon) # applies custom func to all series values

Pokemon
Bulbasaur     Classic
Ivysaur       Classic
Venusaur      Classic
Charmander    Classic
Charmeleon    Classic
               ...   
Yveltal           TBD
Zygarde           TBD
Diancie           TBD
Hoopa             TBD
Volcanion     Classic
Name: Type, Length: 721, dtype: object

### The map Method

In [23]:
pokemon = pd.read_csv('pokemon.csv', index_col = 'Pokemon').squeeze('columns')
pokemon.head()

Pokemon
Bulbasaur     Grass
Ivysaur       Grass
Venusaur      Grass
Charmander     Fire
Charmeleon     Fire
Name: Type, dtype: object

Apply an associative obj(dict, another series) to all series values. 

In [24]:
mappings  = {
    'Grass' : 'Classic',
    'Fire' : 'Classsic',
    'Water' : 'Classic',
    'Normal' : 'Boring'
}

pokemon.map(mappings) # NAN is because those values dont have a pair as listed in the above map

Pokemon
Bulbasaur      Classic
Ivysaur        Classic
Venusaur       Classic
Charmander    Classsic
Charmeleon    Classsic
                ...   
Yveltal            NaN
Zygarde            NaN
Diancie            NaN
Hoopa              NaN
Volcanion     Classsic
Name: Type, Length: 721, dtype: object

In [25]:
# Using a series as a param to map()
mapping_series = pd.Series(mappings)
pokemon.map(mapping_series)

Pokemon
Bulbasaur      Classic
Ivysaur        Classic
Venusaur       Classic
Charmander    Classsic
Charmeleon    Classsic
                ...   
Yveltal            NaN
Zygarde            NaN
Diancie            NaN
Hoopa              NaN
Volcanion     Classsic
Name: Type, Length: 721, dtype: object