# Object creation

In [1]:
s = pd.Series([1, 3, 5, np.nan, 6, 9])
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    9.0
dtype: float64

In [2]:
dates = pd.date_range('20200622', periods=6)
dates

DatetimeIndex(['2020-06-22', '2020-06-23', '2020-06-24', '2020-06-25',
               '2020-06-26', '2020-06-27'],
              dtype='datetime64[ns]', freq='D')

In [3]:
df = pd.DataFrame(np.random.randn(6, 4), index = dates, columns=list('ABCD'))
df

Unnamed: 0,A,B,C,D
2020-06-22,0.228981,-0.727578,1.447435,-0.732685
2020-06-23,-2.070337,1.107792,-1.764229,-0.415482
2020-06-24,0.32495,0.1516,-0.178319,-1.622729
2020-06-25,-1.29812,-1.500661,-0.154394,-0.135952
2020-06-26,0.833273,-0.054246,-0.293124,-1.465844
2020-06-27,1.003995,-0.514694,1.323974,0.159151


In [4]:
df2 = pd.DataFrame({
    'a': 1.,
    'b': pd.Timestamp('20200622'),
    'c': pd.Series(1, index=list(range(4)), dtype = 'float32'),
    'd': np.array([3] * 4, dtype='int32'),
    'e': pd.Categorical(["test", "train", "test","train"]),
    'f': 'foo'
    
})
df2

Unnamed: 0,a,b,c,d,e,f
0,1.0,2020-06-22,1.0,3,test,foo
1,1.0,2020-06-22,1.0,3,train,foo
2,1.0,2020-06-22,1.0,3,test,foo
3,1.0,2020-06-22,1.0,3,train,foo


In [5]:
df2.dtypes # dtypes

a           float64
b    datetime64[ns]
c           float32
d             int32
e          category
f            object
dtype: object

# Viewing data

In [6]:
df.head()

Unnamed: 0,A,B,C,D
2020-06-22,0.228981,-0.727578,1.447435,-0.732685
2020-06-23,-2.070337,1.107792,-1.764229,-0.415482
2020-06-24,0.32495,0.1516,-0.178319,-1.622729
2020-06-25,-1.29812,-1.500661,-0.154394,-0.135952
2020-06-26,0.833273,-0.054246,-0.293124,-1.465844


In [7]:
df.tail(2)

Unnamed: 0,A,B,C,D
2020-06-26,0.833273,-0.054246,-0.293124,-1.465844
2020-06-27,1.003995,-0.514694,1.323974,0.159151


In [8]:
df.index

DatetimeIndex(['2020-06-22', '2020-06-23', '2020-06-24', '2020-06-25',
               '2020-06-26', '2020-06-27'],
              dtype='datetime64[ns]', freq='D')

In [9]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [10]:
df2.to_numpy()

array([[1.0, Timestamp('2020-06-22 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2020-06-22 00:00:00'), 1.0, 3, 'train', 'foo'],
       [1.0, Timestamp('2020-06-22 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2020-06-22 00:00:00'), 1.0, 3, 'train', 'foo']],
      dtype=object)

In [11]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,-0.162876,-0.256298,0.063557,-0.702257
std,1.238758,0.883771,1.189773,0.717806
min,-2.070337,-1.500661,-1.764229,-1.622729
25%,-0.916345,-0.674357,-0.264423,-1.282554
50%,0.276966,-0.28447,-0.166357,-0.574084
75%,0.706192,0.100138,0.954382,-0.205834
max,1.003995,1.107792,1.447435,0.159151


In [12]:
df.T

Unnamed: 0,2020-06-22,2020-06-23,2020-06-24,2020-06-25,2020-06-26,2020-06-27
A,0.228981,-2.070337,0.32495,-1.29812,0.833273,1.003995
B,-0.727578,1.107792,0.1516,-1.500661,-0.054246,-0.514694
C,1.447435,-1.764229,-0.178319,-0.154394,-0.293124,1.323974
D,-0.732685,-0.415482,-1.622729,-0.135952,-1.465844,0.159151


In [13]:
df.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2020-06-22,-0.732685,1.447435,-0.727578,0.228981
2020-06-23,-0.415482,-1.764229,1.107792,-2.070337
2020-06-24,-1.622729,-0.178319,0.1516,0.32495
2020-06-25,-0.135952,-0.154394,-1.500661,-1.29812
2020-06-26,-1.465844,-0.293124,-0.054246,0.833273
2020-06-27,0.159151,1.323974,-0.514694,1.003995


In [14]:
df.sort_values('A')

Unnamed: 0,A,B,C,D
2020-06-23,-2.070337,1.107792,-1.764229,-0.415482
2020-06-25,-1.29812,-1.500661,-0.154394,-0.135952
2020-06-22,0.228981,-0.727578,1.447435,-0.732685
2020-06-24,0.32495,0.1516,-0.178319,-1.622729
2020-06-26,0.833273,-0.054246,-0.293124,-1.465844
2020-06-27,1.003995,-0.514694,1.323974,0.159151


In [15]:
df

Unnamed: 0,A,B,C,D
2020-06-22,0.228981,-0.727578,1.447435,-0.732685
2020-06-23,-2.070337,1.107792,-1.764229,-0.415482
2020-06-24,0.32495,0.1516,-0.178319,-1.622729
2020-06-25,-1.29812,-1.500661,-0.154394,-0.135952
2020-06-26,0.833273,-0.054246,-0.293124,-1.465844
2020-06-27,1.003995,-0.514694,1.323974,0.159151


In [16]:
df[0:3] # slice exclude end #.

Unnamed: 0,A,B,C,D
2020-06-22,0.228981,-0.727578,1.447435,-0.732685
2020-06-23,-2.070337,1.107792,-1.764229,-0.415482
2020-06-24,0.32495,0.1516,-0.178319,-1.622729


In [17]:
df['2020-06-22':'2020-06-25'] # slice include end value.

Unnamed: 0,A,B,C,D
2020-06-22,0.228981,-0.727578,1.447435,-0.732685
2020-06-23,-2.070337,1.107792,-1.764229,-0.415482
2020-06-24,0.32495,0.1516,-0.178319,-1.622729
2020-06-25,-1.29812,-1.500661,-0.154394,-0.135952


# Selection by label

In [18]:
df.loc['2020-06-22': '2020-06-24', ['B','C']]

Unnamed: 0,B,C
2020-06-22,-0.727578,1.447435
2020-06-23,1.107792,-1.764229
2020-06-24,0.1516,-0.178319


In [19]:
df.loc[dates[0], "A"]

0.22898107472740004

In [20]:
df.at[dates[0], "A"]

0.22898107472740004

# Selection by Position

In [21]:
df.iloc[3]

A   -1.298120
B   -1.500661
C   -0.154394
D   -0.135952
Name: 2020-06-25 00:00:00, dtype: float64

In [22]:
df.loc[dates[3]]

A   -1.298120
B   -1.500661
C   -0.154394
D   -0.135952
Name: 2020-06-25 00:00:00, dtype: float64

In [23]:
df.iloc[1:3, 1:3] # range slice

Unnamed: 0,B,C
2020-06-23,1.107792,-1.764229
2020-06-24,0.1516,-0.178319


In [24]:
df.iloc[[1,2,3], [0,2]] # select 

Unnamed: 0,A,C
2020-06-23,-2.070337,-1.764229
2020-06-24,0.32495,-0.178319
2020-06-25,-1.29812,-0.154394


# Boolean indexing

In [25]:
df

Unnamed: 0,A,B,C,D
2020-06-22,0.228981,-0.727578,1.447435,-0.732685
2020-06-23,-2.070337,1.107792,-1.764229,-0.415482
2020-06-24,0.32495,0.1516,-0.178319,-1.622729
2020-06-25,-1.29812,-1.500661,-0.154394,-0.135952
2020-06-26,0.833273,-0.054246,-0.293124,-1.465844
2020-06-27,1.003995,-0.514694,1.323974,0.159151


In [26]:
df[df['A']>0]

Unnamed: 0,A,B,C,D
2020-06-22,0.228981,-0.727578,1.447435,-0.732685
2020-06-24,0.32495,0.1516,-0.178319,-1.622729
2020-06-26,0.833273,-0.054246,-0.293124,-1.465844
2020-06-27,1.003995,-0.514694,1.323974,0.159151


In [27]:
df[df>0]

Unnamed: 0,A,B,C,D
2020-06-22,0.228981,,1.447435,
2020-06-23,,1.107792,,
2020-06-24,0.32495,0.1516,,
2020-06-25,,,,
2020-06-26,0.833273,,,
2020-06-27,1.003995,,1.323974,0.159151


In [28]:
df2 = df.copy()
df2['E'] = ['one','one','two','three','four','three']
df2

Unnamed: 0,A,B,C,D,E
2020-06-22,0.228981,-0.727578,1.447435,-0.732685,one
2020-06-23,-2.070337,1.107792,-1.764229,-0.415482,one
2020-06-24,0.32495,0.1516,-0.178319,-1.622729,two
2020-06-25,-1.29812,-1.500661,-0.154394,-0.135952,three
2020-06-26,0.833273,-0.054246,-0.293124,-1.465844,four
2020-06-27,1.003995,-0.514694,1.323974,0.159151,three


In [29]:
df2.E.isin(['one', 'two'])

2020-06-22     True
2020-06-23     True
2020-06-24     True
2020-06-25    False
2020-06-26    False
2020-06-27    False
Freq: D, Name: E, dtype: bool

In [30]:
df2[df2['E'].isin(['two','four'])]

Unnamed: 0,A,B,C,D,E
2020-06-24,0.32495,0.1516,-0.178319,-1.622729,two
2020-06-26,0.833273,-0.054246,-0.293124,-1.465844,four


In [31]:
df2.E.isin(['two','four'])

2020-06-22    False
2020-06-23    False
2020-06-24     True
2020-06-25    False
2020-06-26     True
2020-06-27    False
Freq: D, Name: E, dtype: bool

# Setting

In [32]:
s1 = pd.Series([1, 2, 3, 4, 5, 6], index = pd.date_range('20200702', periods = 6))
s1

2020-07-02    1
2020-07-03    2
2020-07-04    3
2020-07-05    4
2020-07-06    5
2020-07-07    6
Freq: D, dtype: int64

In [33]:
df.at[dates[0], ['A']] = 0
df

Unnamed: 0,A,B,C,D
2020-06-22,0.0,-0.727578,1.447435,-0.732685
2020-06-23,-2.070337,1.107792,-1.764229,-0.415482
2020-06-24,0.32495,0.1516,-0.178319,-1.622729
2020-06-25,-1.29812,-1.500661,-0.154394,-0.135952
2020-06-26,0.833273,-0.054246,-0.293124,-1.465844
2020-06-27,1.003995,-0.514694,1.323974,0.159151


In [34]:
df.iat[0, 1] = 0
df

Unnamed: 0,A,B,C,D
2020-06-22,0.0,0.0,1.447435,-0.732685
2020-06-23,-2.070337,1.107792,-1.764229,-0.415482
2020-06-24,0.32495,0.1516,-0.178319,-1.622729
2020-06-25,-1.29812,-1.500661,-0.154394,-0.135952
2020-06-26,0.833273,-0.054246,-0.293124,-1.465844
2020-06-27,1.003995,-0.514694,1.323974,0.159151


In [35]:
df.loc[:,'D'] = np.array([5]  * len(df))
df

Unnamed: 0,A,B,C,D
2020-06-22,0.0,0.0,1.447435,5
2020-06-23,-2.070337,1.107792,-1.764229,5
2020-06-24,0.32495,0.1516,-0.178319,5
2020-06-25,-1.29812,-1.500661,-0.154394,5
2020-06-26,0.833273,-0.054246,-0.293124,5
2020-06-27,1.003995,-0.514694,1.323974,5


In [36]:
df2 = df.copy()
df2[df2>0]= -df2
df2

Unnamed: 0,A,B,C,D
2020-06-22,0.0,0.0,-1.447435,-5
2020-06-23,-2.070337,-1.107792,-1.764229,-5
2020-06-24,-0.32495,-0.1516,-0.178319,-5
2020-06-25,-1.29812,-1.500661,-0.154394,-5
2020-06-26,-0.833273,-0.054246,-0.293124,-5
2020-06-27,-1.003995,-0.514694,-1.323974,-5


# Missing data

In [37]:
df1 = df.reindex(index = dates[0:4], columns = list(df.columns) + ['E'])
df1.loc[dates[0]:dates[1], 'E'] = 1
df1

Unnamed: 0,A,B,C,D,E
2020-06-22,0.0,0.0,1.447435,5,1.0
2020-06-23,-2.070337,1.107792,-1.764229,5,1.0
2020-06-24,0.32495,0.1516,-0.178319,5,
2020-06-25,-1.29812,-1.500661,-0.154394,5,


In [38]:
df1.dropna(how = 'any')

Unnamed: 0,A,B,C,D,E
2020-06-22,0.0,0.0,1.447435,5,1.0
2020-06-23,-2.070337,1.107792,-1.764229,5,1.0


In [39]:
df1.fillna(value = 5)

Unnamed: 0,A,B,C,D,E
2020-06-22,0.0,0.0,1.447435,5,1.0
2020-06-23,-2.070337,1.107792,-1.764229,5,1.0
2020-06-24,0.32495,0.1516,-0.178319,5,5.0
2020-06-25,-1.29812,-1.500661,-0.154394,5,5.0


In [40]:
pd.isna(df1)

Unnamed: 0,A,B,C,D,E
2020-06-22,False,False,False,False,False
2020-06-23,False,False,False,False,False
2020-06-24,False,False,False,False,True
2020-06-25,False,False,False,False,True


# Operations

## stats

In [41]:
df

Unnamed: 0,A,B,C,D
2020-06-22,0.0,0.0,1.447435,5
2020-06-23,-2.070337,1.107792,-1.764229,5
2020-06-24,0.32495,0.1516,-0.178319,5
2020-06-25,-1.29812,-1.500661,-0.154394,5
2020-06-26,0.833273,-0.054246,-0.293124,5
2020-06-27,1.003995,-0.514694,1.323974,5


In [42]:
df.mean()

A   -0.201040
B   -0.135035
C    0.063557
D    5.000000
dtype: float64

In [43]:
df.mean(1) # other axis

2020-06-22    1.611859
2020-06-23    0.568306
2020-06-24    1.324558
2020-06-25    0.511706
2020-06-26    1.371476
2020-06-27    1.703319
Freq: D, dtype: float64

In [44]:
s = pd.Series([1, 3, 5, np.nan, 6, 8], index = dates).shift(2)
s

2020-06-22    NaN
2020-06-23    NaN
2020-06-24    1.0
2020-06-25    3.0
2020-06-26    5.0
2020-06-27    NaN
Freq: D, dtype: float64

In [45]:
s1 = pd.Series([1, 3, 5, np.nan, 6, 8], index = dates).shift(1)
s1

2020-06-22    NaN
2020-06-23    1.0
2020-06-24    3.0
2020-06-25    5.0
2020-06-26    NaN
2020-06-27    6.0
Freq: D, dtype: float64

### Broadcastin Substraction

In [46]:
df.sub(s, axis='index') 

Unnamed: 0,A,B,C,D
2020-06-22,,,,
2020-06-23,,,,
2020-06-24,-0.67505,-0.8484,-1.178319,4.0
2020-06-25,-4.29812,-4.500661,-3.154394,2.0
2020-06-26,-4.166727,-5.054246,-5.293124,0.0
2020-06-27,,,,


# Apply

In [47]:
df.apply(np.cumsum)

Unnamed: 0,A,B,C,D
2020-06-22,0.0,0.0,1.447435,5
2020-06-23,-2.070337,1.107792,-0.316795,10
2020-06-24,-1.745387,1.259391,-0.495114,15
2020-06-25,-3.043507,-0.241269,-0.649508,20
2020-06-26,-2.210234,-0.295516,-0.942632,25
2020-06-27,-1.206239,-0.81021,0.381342,30


In [48]:
df.apply(lambda x : x.max() - x.min())

A    3.074332
B    2.608452
C    3.211664
D    0.000000
dtype: float64

In [49]:
df.head()

Unnamed: 0,A,B,C,D
2020-06-22,0.0,0.0,1.447435,5
2020-06-23,-2.070337,1.107792,-1.764229,5
2020-06-24,0.32495,0.1516,-0.178319,5
2020-06-25,-1.29812,-1.500661,-0.154394,5
2020-06-26,0.833273,-0.054246,-0.293124,5


In [50]:
df.loc[dates[1],'A'] - df.loc[dates[3], 'B']

-0.569675993498878

In [51]:
df.max(), df.min()

(A    1.003995
 B    1.107792
 C    1.447435
 D    5.000000
 dtype: float64,
 A   -2.070337
 B   -1.500661
 C   -1.764229
 D    5.000000
 dtype: float64)

# Histogramming

In [52]:
s = pd.Series(np.random.randint(1, 7, size = 10))
s

0    3
1    4
2    4
3    5
4    5
5    1
6    1
7    6
8    4
9    3
dtype: int64

In [53]:
s.value_counts()

4    3
5    2
3    2
1    2
6    1
dtype: int64

# Merge

## Concat

In [54]:
df = pd.DataFrame(np.random.randn(10, 4))
df

Unnamed: 0,0,1,2,3
0,-2.042715,1.102175,-0.214373,0.164772
1,0.263519,0.778942,-0.391478,0.227257
2,1.542997,0.695206,1.63892,-0.259747
3,-0.718554,0.101985,-0.889594,-0.084593
4,-0.234127,1.553949,1.622431,0.829412
5,-1.015276,-1.221205,-1.501135,1.024179
6,-1.572855,-0.261544,-0.57356,0.507912
7,-2.30135,-0.27931,0.371906,0.973609
8,-0.046992,-0.243677,0.975888,0.468358
9,-0.558788,-2.136094,0.675703,-0.342115


In [55]:
pieces = [df[:3], df[3:7], df[7:]]
pieces

[          0         1         2         3
 0 -2.042715  1.102175 -0.214373  0.164772
 1  0.263519  0.778942 -0.391478  0.227257
 2  1.542997  0.695206  1.638920 -0.259747,
           0         1         2         3
 3 -0.718554  0.101985 -0.889594 -0.084593
 4 -0.234127  1.553949  1.622431  0.829412
 5 -1.015276 -1.221205 -1.501135  1.024179
 6 -1.572855 -0.261544 -0.573560  0.507912,
           0         1         2         3
 7 -2.301350 -0.279310  0.371906  0.973609
 8 -0.046992 -0.243677  0.975888  0.468358
 9 -0.558788 -2.136094  0.675703 -0.342115]

In [56]:
pd.concat(pieces)

Unnamed: 0,0,1,2,3
0,-2.042715,1.102175,-0.214373,0.164772
1,0.263519,0.778942,-0.391478,0.227257
2,1.542997,0.695206,1.63892,-0.259747
3,-0.718554,0.101985,-0.889594,-0.084593
4,-0.234127,1.553949,1.622431,0.829412
5,-1.015276,-1.221205,-1.501135,1.024179
6,-1.572855,-0.261544,-0.57356,0.507912
7,-2.30135,-0.27931,0.371906,0.973609
8,-0.046992,-0.243677,0.975888,0.468358
9,-0.558788,-2.136094,0.675703,-0.342115


# Join

In [57]:
left = pd.DataFrame({'key':['foo', 'foo'], 'lval':[1, 2]})

In [58]:
right = pd.DataFrame({'key':['foo', 'foo'], 'rval':[4, 5]})

In [59]:
left

Unnamed: 0,key,lval
0,foo,1
1,foo,2


In [60]:
right

Unnamed: 0,key,rval
0,foo,4
1,foo,5


In [61]:
pd.merge(left, right, on = 'key')

Unnamed: 0,key,lval,rval
0,foo,1,4
1,foo,1,5
2,foo,2,4
3,foo,2,5


# Grouping

In [62]:
df = pd.DataFrame({'A':['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'],
                 'B':['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'], 
                 'C': np.random.randn(8),
                 'D': np.random.randn(8)})
df

Unnamed: 0,A,B,C,D
0,foo,one,-0.45666,-1.258844
1,bar,one,0.171392,0.781234
2,foo,two,-0.391223,-0.374179
3,bar,three,0.858426,-0.470003
4,foo,two,0.433002,-0.999468
5,bar,two,0.38205,1.671515
6,foo,one,0.795965,0.903871
7,foo,three,1.975045,1.254655


In [63]:
df.groupby(['A', 'B']).sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,C,D
A,B,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,0.171392,0.781234
bar,three,0.858426,-0.470003
bar,two,0.38205,1.671515
foo,one,0.339305,-0.354973
foo,three,1.975045,1.254655
foo,two,0.041779,-1.373647


# Reshaping

## Stack

In [64]:
tuples = list(zip(*[['bar', 'bar', 'baz', 'baz',
                    'foo', 'foo', 'qux', 'qux'],
                   ['one', 'two', 'one', 'two', 
                    'one', 'two', 'one', 'tow']]))
tuples

[('bar', 'one'),
 ('bar', 'two'),
 ('baz', 'one'),
 ('baz', 'two'),
 ('foo', 'one'),
 ('foo', 'two'),
 ('qux', 'one'),
 ('qux', 'tow')]

In [65]:
index = pd.MultiIndex.from_tuples(tuples, names=['first', 'second'])

In [66]:
df= pd.DataFrame(np.random.randn(8, 2), index = index, columns = ['A', 'B'])
df2 = df[:4]
df2

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,1.650803,-2.528504
bar,two,0.313994,0.712965
baz,one,-0.01812,1.752708
baz,two,1.309468,0.21354


In [67]:
stacked = df2.stack()
stacked

first  second   
bar    one     A    1.650803
               B   -2.528504
       two     A    0.313994
               B    0.712965
baz    one     A   -0.018120
               B    1.752708
       two     A    1.309468
               B    0.213540
dtype: float64

In [68]:
stacked.unstack(1)

Unnamed: 0_level_0,second,one,two
first,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,A,1.650803,0.313994
bar,B,-2.528504,0.712965
baz,A,-0.01812,1.309468
baz,B,1.752708,0.21354


In [69]:
stacked.unstack(0)

Unnamed: 0_level_0,first,bar,baz
second,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
one,A,1.650803,-0.01812
one,B,-2.528504,1.752708
two,A,0.313994,1.309468
two,B,0.712965,0.21354


# Pivot tables

In [70]:
df = pd.DataFrame({'A':['one', 'one', 'two', 'three'] * 3,
                   'B':['A', 'B', 'C'] * 4,
                   'C':['foo', 'foo', 'foo', 'bar', 'bar', 'bar'] * 2,
                   'D': np.random.randn(12),
                   'E': np.random.randn(12)})
df

Unnamed: 0,A,B,C,D,E
0,one,A,foo,-0.839239,1.974523
1,one,B,foo,0.454613,0.976532
2,two,C,foo,-1.652568,-1.469812
3,three,A,bar,1.691391,0.352112
4,one,B,bar,-0.605328,1.605465
5,one,C,bar,0.951547,0.54723
6,two,A,foo,1.726584,-0.42199
7,three,B,foo,-0.333213,-0.214344
8,one,C,foo,1.99312,-2.273747
9,one,A,bar,0.047812,0.649491


In [71]:
pd.pivot_table(df, values='D', index = ['A', 'B'], columns = ['C'])

Unnamed: 0_level_0,C,bar,foo
A,B,Unnamed: 2_level_1,Unnamed: 3_level_1
one,A,0.047812,-0.839239
one,B,-0.605328,0.454613
one,C,0.951547,1.99312
three,A,1.691391,
three,B,,-0.333213
three,C,-1.223355,
two,A,,1.726584
two,B,-0.189033,
two,C,,-1.652568


# Time series

In [72]:
rng = pd.date_range('7/10/2020', periods = 100, freq = 'S')

In [73]:
ts = pd.Series(np.random.randint(0, 500, len(rng)), index=rng)

In [74]:
ts

2020-07-10 00:00:00    245
2020-07-10 00:00:01    301
2020-07-10 00:00:02    429
2020-07-10 00:00:03     44
2020-07-10 00:00:04    197
                      ... 
2020-07-10 00:01:35    324
2020-07-10 00:01:36    409
2020-07-10 00:01:37    414
2020-07-10 00:01:38    273
2020-07-10 00:01:39     58
Freq: S, Length: 100, dtype: int64

In [75]:
ts.resample('5Min').sum()

2020-07-10    26293
Freq: 5T, dtype: int64

In [76]:
rng = pd.date_range('10/6/2020 00:00', periods=5, freq='D')
ts = pd.Series(np.random.randn(len(rng)), rng)
ts

2020-10-06    1.567202
2020-10-07    0.329614
2020-10-08   -2.933868
2020-10-09   -0.652666
2020-10-10   -0.663714
Freq: D, dtype: float64

In [77]:
ts_utc = ts.tz_localize('UTC')
ts_utc

2020-10-06 00:00:00+00:00    1.567202
2020-10-07 00:00:00+00:00    0.329614
2020-10-08 00:00:00+00:00   -2.933868
2020-10-09 00:00:00+00:00   -0.652666
2020-10-10 00:00:00+00:00   -0.663714
Freq: D, dtype: float64

In [78]:
ts_utc.tz_convert('Asia/Tokyo')

2020-10-06 09:00:00+09:00    1.567202
2020-10-07 09:00:00+09:00    0.329614
2020-10-08 09:00:00+09:00   -2.933868
2020-10-09 09:00:00+09:00   -0.652666
2020-10-10 09:00:00+09:00   -0.663714
Freq: D, dtype: float64

In [79]:
ts_utc.tz_convert('Asia/Seoul')

2020-10-06 09:00:00+09:00    1.567202
2020-10-07 09:00:00+09:00    0.329614
2020-10-08 09:00:00+09:00   -2.933868
2020-10-09 09:00:00+09:00   -0.652666
2020-10-10 09:00:00+09:00   -0.663714
Freq: D, dtype: float64

In [82]:
rng = pd.date_range('7/13/2020', periods = 5, freq='M')
rng

DatetimeIndex(['2020-07-31', '2020-08-31', '2020-09-30', '2020-10-31',
               '2020-11-30'],
              dtype='datetime64[ns]', freq='M')

In [83]:
ts = pd.Series(np.random.randn(len(rng)), index = rng)
ts

2020-07-31   -0.463494
2020-08-31   -0.381535
2020-09-30    0.276126
2020-10-31    1.243009
2020-11-30    1.297636
Freq: M, dtype: float64

In [85]:
ps = ts.to_period()
ps

2020-07   -0.463494
2020-08   -0.381535
2020-09    0.276126
2020-10    1.243009
2020-11    1.297636
Freq: M, dtype: float64

In [86]:
ps.to_timestamp()

2020-07-01   -0.463494
2020-08-01   -0.381535
2020-09-01    0.276126
2020-10-01    1.243009
2020-11-01    1.297636
Freq: MS, dtype: float64

In [87]:
prng = pd.period_range('2010Q1', '2020Q4', freq = 'Q-NOV')

In [88]:
ts = pd.Series(np.random.randn(len(prng)), prng)

In [89]:
ts.index = (prng.asfreq('M', 'e') + 1).asfreq('H', 's') + 9
ts.head()

2010-03-01 09:00   -0.225197
2010-06-01 09:00    2.031961
2010-09-01 09:00    0.314582
2010-12-01 09:00    0.734045
2011-03-01 09:00    0.678713
Freq: H, dtype: float64