In [16]:
import numpy as np

In [2]:
import pandas as pd

In [3]:
conda list pandas

# packages in environment at C:\Users\shahp\anaconda3:
#
# Name                    Version                   Build  Channel
pandas                    1.3.4            py39h6214cd6_0  

Note: you may need to restart the kernel to use updated packages.


## The Series
The series is the object of the pandas library designed to represent 1-D data structures, similar to an array but with some additional features. It is composed of two arrays associated with each other i.e. index and value.

In [24]:
# Create a series using Series() constructor.
s = pd.Series([12,-8,73,9])
s

0    12
1    -8
2    73
3     9
dtype: int64

In [28]:
# Assigning index labels
s = pd.Series([12,-8,73,9,11],index=['a','b','c','d','e'])
s

a    12
b    -8
c    73
d     9
e    11
dtype: int64

In [29]:
# To individually see the two arrays that make up this data structure. 
s.values

array([12, -8, 73,  9, 11], dtype=int64)

In [30]:
s.index

Index(['a', 'b', 'c', 'd', 'e'], dtype='object')

## Selecting the Internal Elements
Just like indexing, by specifying the key

In [31]:
s[4]

11

In [32]:
# Or by using the index label
s['c']

73

In [33]:
# Series slcing
s[0:3]

a    12
b    -8
c    73
dtype: int64

In [34]:
# Slicing using labels
s[['a','c','e']]

a    12
c    73
e    11
dtype: int64

## Assigning Values to the Elements
Just like it is done in lists or arrays 

In [35]:
s[1] = 23

In [36]:
s

a    12
b    23
c    73
d     9
e    11
dtype: int64

In [37]:
s['e'] = -99

In [38]:
s

a    12
b    23
c    73
d     9
e   -99
dtype: int64

## Defining a Series from NumPy Arrays and other series

In [39]:
arr = np.array([1,2,3,4])
s2 = pd.Series(arr)
s2

0    1
1    2
2    3
3    4
dtype: int32

In [40]:
# Using pre defined Series object 's'
s3 = pd.Series(s)
s3

a    12
b    23
c    73
d     9
e   -99
dtype: int64

<i>VIMP : The values contained in the NumPy array or in the original series are not copied but are passed by reference. If the values change, then those changes will also reflect in the new series object.</i> 

In [41]:
s3

a    12
b    23
c    73
d     9
e   -99
dtype: int64

In [42]:
arr[2] = -25
arr

array([  1,   2, -25,   4])

In [43]:
s2

0     1
1     2
2   -25
3     4
dtype: int32

## Filtering Values

In [45]:
s

a    12
b    23
c    73
d     9
e   -99
dtype: int64

In [47]:
s[s<15]

a    12
d     9
e   -99
dtype: int64

## Operations and Mathematical Functions

In [48]:
s/2

a     6.0
b    11.5
c    36.5
d     4.5
e   -49.5
dtype: float64

In [49]:
# With numpy mathematical functions, we must specify the function np and the instance of the series passed as an argument.
np.log(s)

  result = getattr(ufunc, method)(*inputs, **kwargs)


a    2.484907
b    3.135494
c    4.290459
d    2.197225
e         NaN
dtype: float64

## Evaluating Values 
There are often duplicate values in a series. Then you may need to have more information about the samples, including existence of any duplicates and whether a certain value is present in the series. In this regard, you can declare a series in which there are many duplicate values.

In [52]:
serd = pd.Series([1,0,2,1,2,3],index=['white','white','blue','green','green','yellow'])
serd

white     1
white     0
blue      2
green     1
green     2
yellow    3
dtype: int64

In [53]:
# To know all the unique values contained in the series.
serd.unique()

array([1, 0, 2, 3], dtype=int64)

In [54]:
# A function similar to unique() is value_counts(), which returns the values with their respective frequiencies within a 
# series
serd.value_counts()

1    2
2    2
0    1
3    1
dtype: int64

In [55]:
serd

white     1
white     0
blue      2
green     1
green     2
yellow    3
dtype: int64

In [56]:
# isin() evaluates the membership, that is,  the given list of values. This function tells you if the values are contained
# in the datastructure. The boolean values returned can be very useful.
serd.isin([0,3])

white     False
white      True
blue      False
green     False
green     False
yellow     True
dtype: bool

In [57]:
serd[serd.isin([0,3])]

white     0
yellow    3
dtype: int64

## NaN Values


In [59]:
# Explicitly defining a NaN value and adding them to a data structure.
s2 = pd.Series([5,-3,np.nan,14])
s2

0     5.0
1    -3.0
2     NaN
3    14.0
dtype: float64

In [60]:
# Both isnull() and notnull() RETURN a series with boolean values.
s2.isnull()

0    False
1    False
2     True
3    False
dtype: bool

In [61]:
s2.notnull()

0     True
1     True
2    False
3     True
dtype: bool

In [62]:
# These functions are often places inside filters to make a condition
s2[s2.notnull()]

0     5.0
1    -3.0
3    14.0
dtype: float64

In [63]:
s2[s2.isnull()]

2   NaN
dtype: float64

## Series as Dictionaries 


In [64]:
mydict = {'red': 2000, 'blue': 1000, 'yellow': 500, 'orange': 1000}
s = pd.Series(mydict)
s

red       2000
blue      1000
yellow     500
orange    1000
dtype: int64

In [65]:
colors = ['red','yellow','orange','blue','green']
s = pd.Series(mydict,index=colors)
s

red       2000.0
yellow     500.0
orange    1000.0
blue      1000.0
green        NaN
dtype: float64

## Operations Between Series

In [66]:
s

red       2000.0
yellow     500.0
orange    1000.0
blue      1000.0
green        NaN
dtype: float64

In [67]:
mydict2 = {'red':400,'yellow':1000,'black':700}

In [69]:
myseries2 = pd.Series(mydict2)

In [70]:
s + myseries2

black        NaN
blue         NaN
green        NaN
orange       NaN
red       2400.0
yellow    1500.0
dtype: float64

You get a new object series in which only the items with the same label are added. All other labels present in one of the two series are still added to the result but have a NaN value.