In [1]:
import numpy as np
import pandas as pd

In [2]:
s = pd.Series([1, 2, np.nan])

In [3]:
s

0    1.0
1    2.0
2    NaN
dtype: float64

In [4]:
s[0]

1.0

In [5]:
s.sum()

3.0

In [6]:
df = pd.DataFrame({'A': [1, 2], 'B': [3, 4]})
df

Unnamed: 0,A,B
0,1,3
1,2,4


In [7]:
df = pd.DataFrame(np.random.randn(6, 4), index=pd.date_range('20210727', periods=6), columns=['A', 'B', 'C', 'D'])
df

Unnamed: 0,A,B,C,D
2021-07-27,-0.064797,2.310397,0.052132,-1.225728
2021-07-28,-0.121391,0.240072,-1.109627,0.164993
2021-07-29,0.119619,0.850873,0.725844,0.41119
2021-07-30,-0.058431,-0.510533,0.505219,0.02487
2021-07-31,0.14441,-0.514985,-1.192797,0.175636
2021-08-01,0.074873,1.477725,-0.042691,0.759562


In [8]:
df.head(2)

Unnamed: 0,A,B,C,D
2021-07-27,-0.064797,2.310397,0.052132,-1.225728
2021-07-28,-0.121391,0.240072,-1.109627,0.164993


In [9]:
df.tail(1)

Unnamed: 0,A,B,C,D
2021-08-01,0.074873,1.477725,-0.042691,0.759562


In [10]:
df.index

DatetimeIndex(['2021-07-27', '2021-07-28', '2021-07-29', '2021-07-30',
               '2021-07-31', '2021-08-01'],
              dtype='datetime64[ns]', freq='D')

In [11]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [12]:
df.values

array([[-0.06479663,  2.31039749,  0.0521324 , -1.22572764],
       [-0.1213913 ,  0.24007206, -1.10962743,  0.16499253],
       [ 0.11961896,  0.85087301,  0.725844  ,  0.41119044],
       [-0.05843109, -0.51053281,  0.50521882,  0.0248696 ],
       [ 0.14441006, -0.51498533, -1.19279708,  0.17563585],
       [ 0.07487337,  1.47772543, -0.04269145,  0.75956229]])

In [13]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,0.015714,0.642258,-0.176987,0.051754
std,0.111028,1.127307,0.806564,0.67691
min,-0.121391,-0.514985,-1.192797,-1.225728
25%,-0.063205,-0.322882,-0.842893,0.0599
50%,0.008221,0.545473,0.00472,0.170314
75%,0.108433,1.321012,0.391947,0.352302
max,0.14441,2.310397,0.725844,0.759562


In [14]:
df.T

Unnamed: 0,2021-07-27,2021-07-28,2021-07-29,2021-07-30,2021-07-31,2021-08-01
A,-0.064797,-0.121391,0.119619,-0.058431,0.14441,0.074873
B,2.310397,0.240072,0.850873,-0.510533,-0.514985,1.477725
C,0.052132,-1.109627,0.725844,0.505219,-1.192797,-0.042691
D,-1.225728,0.164993,0.41119,0.02487,0.175636,0.759562


In [15]:
df.sort_values(by='A')

Unnamed: 0,A,B,C,D
2021-07-28,-0.121391,0.240072,-1.109627,0.164993
2021-07-27,-0.064797,2.310397,0.052132,-1.225728
2021-07-30,-0.058431,-0.510533,0.505219,0.02487
2021-08-01,0.074873,1.477725,-0.042691,0.759562
2021-07-29,0.119619,0.850873,0.725844,0.41119
2021-07-31,0.14441,-0.514985,-1.192797,0.175636


In [16]:
df.loc['20210728']

A   -0.121391
B    0.240072
C   -1.109627
D    0.164993
Name: 2021-07-28 00:00:00, dtype: float64

In [17]:
df.loc['20210728':'20210729', ['A', 'D']]

Unnamed: 0,A,D
2021-07-28,-0.121391,0.164993
2021-07-29,0.119619,0.41119


In [18]:
df[df > 0]

Unnamed: 0,A,B,C,D
2021-07-27,,2.310397,0.052132,
2021-07-28,,0.240072,,0.164993
2021-07-29,0.119619,0.850873,0.725844,0.41119
2021-07-30,,,0.505219,0.02487
2021-07-31,0.14441,,,0.175636
2021-08-01,0.074873,1.477725,,0.759562


In [21]:
df2 = df.copy()
df['E'] = ['one', 'two', 'three', 'four', 'five', 'six']
df2

Unnamed: 0,A,B,C,D,E
2021-07-27,-0.064797,2.310397,0.052132,-1.225728,one
2021-07-28,-0.121391,0.240072,-1.109627,0.164993,two
2021-07-29,0.119619,0.850873,0.725844,0.41119,three
2021-07-30,-0.058431,-0.510533,0.505219,0.02487,four
2021-07-31,0.14441,-0.514985,-1.192797,0.175636,five
2021-08-01,0.074873,1.477725,-0.042691,0.759562,six


In [22]:
df2[df2['E'].isin(['one', 'six'])]

Unnamed: 0,A,B,C,D,E
2021-07-27,-0.064797,2.310397,0.052132,-1.225728,one
2021-08-01,0.074873,1.477725,-0.042691,0.759562,six


In [25]:
s = pd.Series([1, 2, 3, 4, 5, 6], index=pd.date_range('20210727', periods=6))

In [26]:
s

2021-07-27    1
2021-07-28    2
2021-07-29    3
2021-07-30    4
2021-07-31    5
2021-08-01    6
Freq: D, dtype: int64

In [27]:
df['E'] = s

In [36]:
df

Unnamed: 0,A,B,C,D,E
2021-07-27,-0.064797,2.310397,0.052132,-1.225728,1
2021-07-28,2.0,0.240072,-1.109627,0.164993,2
2021-07-29,0.119619,0.850873,0.725844,0.41119,3
2021-07-30,-0.058431,-0.510533,0.505219,0.02487,4
2021-07-31,0.14441,-0.514985,-1.192797,0.175636,5
2021-08-01,0.074873,1.477725,-0.042691,0.759562,6


In [41]:
df = pd.DataFrame(np.random.randn(2, 2))
df3 =  pd.DataFrame(np.random.randn(2, 2))
print(df)
print(df3)

          0         1
0  0.599766  1.456788
1  0.244921 -0.845158
          0         1
0  0.192898 -0.522135
1  0.210687  0.799466


In [42]:
pd.concat([df, df3])

Unnamed: 0,0,1
0,0.599766,1.456788
1,0.244921,-0.845158
0,0.192898,-0.522135
1,0.210687,0.799466


In [51]:
s = df.iloc[0]
pd.concat([df, df3]).append(s, ignore_index=True)

Unnamed: 0,0,1
0,0.599766,1.456788
1,0.244921,-0.845158
2,0.192898,-0.522135
3,0.210687,0.799466
4,0.599766,1.456788


In [53]:
df = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar'], 'B': np.random.randn(4)})
df

Unnamed: 0,A,B
0,foo,0.333388
1,bar,1.355694
2,foo,0.26771
3,bar,0.943156


In [55]:
df.groupby('A').sum()

Unnamed: 0_level_0,B
A,Unnamed: 1_level_1
bar,2.29885
foo,0.601098


In [56]:
import pandas_datareader

In [59]:
s = pandas_datareader.data.DataReader('AAPL', 'yahoo', '2021-07-20')
s

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-07-19,144.070007,141.669998,143.75,142.449997,121434600,142.449997
2021-07-20,147.100006,142.960007,143.460007,146.149994,96350000,146.149994
2021-07-21,146.130005,144.630005,145.529999,145.399994,74993500,145.399994
2021-07-22,148.199997,145.809998,145.940002,146.800003,77338200,146.800003
2021-07-23,148.720001,146.919998,147.550003,148.559998,71361600,148.559998
2021-07-26,149.830002,147.699997,148.270004,148.990005,72434089,148.990005
