In [None]:
!pip install pandas

In [1]:
import pandas as pd
import numpy as np

## Pandas Series

##### Let's create a Series for days in Calendar Year 2021

In [2]:
cy2021=pd.Series([31,28,31,30,31,30,31,31,30,31,30,31])

In [3]:
print(cy2021)

0     31
1     28
2     31
3     30
4     31
5     30
6     31
7     31
8     30
9     31
10    30
11    31
dtype: int64


In [4]:
pd.Series?

[0;31mInit signature:[0m
[0mpd[0m[0;34m.[0m[0mSeries[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mdata[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mindex[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mdtype[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mname[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mcopy[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mfastpath[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m     
One-dimensional ndarray with axis labels (including time series).

Labels need not be unique but must be a hashable type. The object
supports both integer- and label-based indexing and provides a host of
methods for performing operations involving the index. Statistical
methods from ndarray have been overridden to automatically exclude
missing data (currentl

In [5]:
cy2021=pd.Series({1:31,2:28,3:31,4:30,5:31,6:30,7:31,8:31,9:30,10:31,11:30,12:31})

In [6]:
print(cy2021)

1     31
2     28
3     31
4     30
5     31
6     30
7     31
8     31
9     30
10    31
11    30
12    31
dtype: int64


In [None]:
print(cy2021.head())
print(cy2021.tail(3))

## Indexing

In [7]:
cy2021[0]

KeyError: 0

In [8]:
cy2021[1]

31

#### `iloc` -- an integer-location based indexing for selection by position

In [9]:
print(cy2021.iloc[0])
print(cy2021.iloc[-1])

31
31


In [10]:
print(cy2021.iloc[[0]])
print(cy2021.iloc[[-1]])

1    31
dtype: int64
12    31
dtype: int64


In [11]:
print(cy2021.index)

Int64Index([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], dtype='int64')


In [12]:
cy2021.index=['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']

In [13]:
print(cy2021.index)

Index(['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct',
       'Nov', 'Dec'],
      dtype='object')


In [14]:
print(cy2021)

Jan    31
Feb    28
Mar    31
Apr    30
May    31
Jun    30
Jul    31
Aug    31
Sep    30
Oct    31
Nov    30
Dec    31
dtype: int64


#### `loc` -- access data by label(s) 

In [15]:
cy2021.loc['Feb']

28

In [17]:
cy2021.loc[['Jan','Feb']]

Jan    31
Feb    28
dtype: int64

In [16]:
cy2021.loc[['Feb']]

Feb    28
dtype: int64

#### Copy whole / partial data between Series

##### Let's create Calendar Year 2020 by copying 2021

In [18]:
cy2020=cy2021

In [19]:
print(id(cy2020))

140672134934928


In [20]:
print(id(cy2021))

140672134934928


#### Create a new object with the copy of data

In [None]:
cy2020=pd.Series(data=cy2021, copy=True)

In [21]:
cy2020=cy2021.copy()

In [22]:
print(cy2020)

Jan    31
Feb    28
Mar    31
Apr    30
May    31
Jun    30
Jul    31
Aug    31
Sep    30
Oct    31
Nov    30
Dec    31
dtype: int64


In [23]:
cy2020[['Feb']]=29

In [24]:
print(cy2020)

Jan    31
Feb    29
Mar    31
Apr    30
May    31
Jun    30
Jul    31
Aug    31
Sep    30
Oct    31
Nov    30
Dec    31
dtype: int64


##### Create a Fiscal Year 2021 starting on July

In [25]:
fy2021=cy2020['Jul':].copy()

In [26]:
print(id(fy2021), id(cy2020))

140671864140224 140671864140080


In [27]:
print(fy2021)

Jul    31
Aug    31
Sep    30
Oct    31
Nov    30
Dec    31
dtype: int64


In [28]:
fy2021=fy2021.append(cy2021[:'Jun'])

In [29]:
print(fy2021)

Jul    31
Aug    31
Sep    30
Oct    31
Nov    30
Dec    31
Jan    31
Feb    28
Mar    31
Apr    30
May    31
Jun    30
dtype: int64


In [30]:
fy2021=pd.Series(index=cy2020.index[6:].append(cy2021.index[:6]), dtype='int64')

In [31]:
print(fy2021)

Jul    0
Aug    0
Sep    0
Oct    0
Nov    0
Dec    0
Jan    0
Feb    0
Mar    0
Apr    0
May    0
Jun    0
dtype: int64


In [32]:
print(fy2021.loc['Jul':'Jun'])

Jul    0
Aug    0
Sep    0
Oct    0
Nov    0
Dec    0
Jan    0
Feb    0
Mar    0
Apr    0
May    0
Jun    0
dtype: int64


In [34]:
print(fy2021.loc['Jan':'Dec'])

Series([], dtype: int64)


In [35]:
fy2021.iloc[:]=cy2020

In [37]:
fy2021.loc[:]=cy2020

In [39]:
fy2021.loc[['Feb']]=cy2021

In [40]:
print(fy2021)

Jul    31
Aug    31
Sep    30
Oct    31
Nov    30
Dec    31
Jan    31
Feb    28
Mar    31
Apr    30
May    31
Jun    30
dtype: int64


### Indexes and Operations on Series

In [41]:
cy2020 - cy2021

Jan    0
Feb    1
Mar    0
Apr    0
May    0
Jun    0
Jul    0
Aug    0
Sep    0
Oct    0
Nov    0
Dec    0
dtype: int64

In [42]:
day_differences = cy2020 - fy2021

In [43]:
day_differences

Apr    0
Aug    0
Dec    0
Feb    1
Jan    0
Jul    0
Jun    0
Mar    0
May    0
Nov    0
Oct    0
Sep    0
dtype: int64

In [44]:
day_differences.index=cy2021.index

In [47]:
day_differences

Apr    0
Aug    0
Dec    0
Feb    1
Jan    0
Jul    0
Jun    0
Mar    0
May    0
Nov    0
Oct    0
Sep    0
dtype: int64

In [46]:
day_differences = cy2020 - fy2021

In [48]:
day_differences.reindex_like(cy2020)

Jan    0
Feb    1
Mar    0
Apr    0
May    0
Jun    0
Jul    0
Aug    0
Sep    0
Oct    0
Nov    0
Dec    0
dtype: int64

In [49]:
day_differences.reindex_like(fy2021)

Jul    0
Aug    0
Sep    0
Oct    0
Nov    0
Dec    0
Jan    0
Feb    1
Mar    0
Apr    0
May    0
Jun    0
dtype: int64

### Boolean indexes

In [50]:
mask_boolean_vector=[False, False, False, False, False, True, True, True, False, False, False, False]

In [51]:
print(cy2021[mask_boolean_vector])

Jun    30
Jul    31
Aug    31
dtype: int64


In [52]:
print(cy2021)

Jan    31
Feb    28
Mar    31
Apr    30
May    31
Jun    30
Jul    31
Aug    31
Sep    30
Oct    31
Nov    30
Dec    31
dtype: int64


In [53]:
mask_boolean_vector_series=pd.Series([False, False, False, False, False, True, True, True, False, False, False, False], 
                               index=['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'])
print(mask_boolean_vector_series)

Jan    False
Feb    False
Mar    False
Apr    False
May    False
Jun     True
Jul     True
Aug     True
Sep    False
Oct    False
Nov    False
Dec    False
dtype: bool


In [54]:
print(fy2021[mask_boolean_vector_series])

Jul    31
Aug    31
Jun    30
dtype: int64


In [55]:
print(fy2021[mask_boolean_vector])

Dec    31
Jan    31
Feb    28
dtype: int64


In [56]:
cy2021>30

Jan     True
Feb    False
Mar     True
Apr    False
May     True
Jun    False
Jul     True
Aug     True
Sep    False
Oct     True
Nov    False
Dec     True
dtype: bool

In [57]:
cy2021[cy2021==30]

Apr    30
Jun    30
Sep    30
Nov    30
dtype: int64

In [58]:
mask_months_30days=(cy2021==30)
print(mask_months_30days)

Jan    False
Feb    False
Mar    False
Apr     True
May    False
Jun     True
Jul    False
Aug    False
Sep     True
Oct    False
Nov     True
Dec    False
dtype: bool


In [59]:
print(cy2021[mask_months_30days])

Apr    30
Jun    30
Sep    30
Nov    30
dtype: int64


In [60]:
cy2021[cy2020==29]

Feb    28
dtype: int64

## Pandas DataFrames

In [None]:
df_years = pd.DataFrame({'2020': cy2020, '2021': cy2021})
print(df_years)

In [None]:
df_years = pd.DataFrame({'2020': cy2020, '2021': cy2021, 'fy2021': fy2021})
print(df_years)

In [None]:
df_years = pd.DataFrame({'2020': cy2020, '2021': cy2021, 'fy2021': fy2021}, 
                           index=cy2020.index
                       )
print(df_years)

In [None]:
df_years.apply(func=np.sum, axis=0)

### Read from CSV

In [None]:
df_recvd = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv')

In [None]:
display(df_recvd)

### Collect info about a dataframe

In [None]:
df_recvd.info()

In [None]:
df_recvd.index

In [None]:
df_recvd.columns

In [None]:
df_recvd.axes

### Access by index/labels

In [None]:
df_recvd.iloc[194]

In [None]:
df_recvd[194:195]

In [None]:
df_recvd.iloc[194:195]

In [None]:
df_recvd.loc[194:195]

In [None]:
df_recvd.Lat

In [None]:
df_recvd['Country/Region']

In [None]:
df_recvd[['Country/Region']]

In [None]:
df_recvd.iloc[194].loc['3/22/21']

In [None]:
df_recvd[df_recvd['Country/Region']=='Poland']

In [None]:
df_recvd[df_recvd['Country/Region']=='Poland'].loc[:,'3/22/21']

In [None]:
df_recvd[df_recvd['Country/Region']=='Poland']['3/22/21']

## Have fun! 