# The pandas tutorial

https://pandas.pydata.org/docs/user_guide/10min.html

In [1]:
import numpy as np

In [2]:
import pandas as pd

In [3]:
s = pd.Series([1, 3, 5, np.nan, 6, 8])

In [4]:
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [5]:
dates = pd.date_range("20130101", periods=6)

In [6]:
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [7]:
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list("ABCD"))

In [8]:
df

Unnamed: 0,A,B,C,D
2013-01-01,-3.467033,-1.140586,0.12117,-0.80927
2013-01-02,-0.093654,-0.223228,-1.494727,-1.2982
2013-01-03,0.105552,-1.691886,-0.680365,-2.705899
2013-01-04,-0.849691,-0.717126,-0.595464,0.081849
2013-01-05,-0.248744,-0.523556,-1.209375,1.016126
2013-01-06,0.614254,0.469153,0.164882,0.528246


In [9]:
df2 = pd.DataFrame(
    {
        "A": 1.0,
        "B": pd.Timestamp("20130102"),
        "C": pd.Series(1, index=list(range(4)), dtype="float32"),
        "D": np.array([3] * 4, dtype="int32"),
        "E": pd.Categorical(["test", "train", "test", "train"]),
        "F": "foo",
    }
)

In [10]:
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [11]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

In [16]:
df2.bool

<bound method NDFrame.bool of      A          B    C  D      E    F
0  1.0 2013-01-02  1.0  3   test  foo
1  1.0 2013-01-02  1.0  3  train  foo
2  1.0 2013-01-02  1.0  3   test  foo
3  1.0 2013-01-02  1.0  3  train  foo>

In [13]:
df.head()

Unnamed: 0,A,B,C,D
2013-01-01,-3.467033,-1.140586,0.12117,-0.80927
2013-01-02,-0.093654,-0.223228,-1.494727,-1.2982
2013-01-03,0.105552,-1.691886,-0.680365,-2.705899
2013-01-04,-0.849691,-0.717126,-0.595464,0.081849
2013-01-05,-0.248744,-0.523556,-1.209375,1.016126


In [14]:
df.tail(3)

Unnamed: 0,A,B,C,D
2013-01-04,-0.849691,-0.717126,-0.595464,0.081849
2013-01-05,-0.248744,-0.523556,-1.209375,1.016126
2013-01-06,0.614254,0.469153,0.164882,0.528246
