In [4]:
import pandas as pd
import numpy as np

In [5]:
dates = pd.date_range("20130101", periods=6)
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [6]:
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list("ABCD"))
df

Unnamed: 0,A,B,C,D
2013-01-01,-1.059885,-0.340192,-1.737988,-0.245752
2013-01-02,0.156104,0.644826,-3.499711,-0.367861
2013-01-03,0.389586,-0.542111,0.5873,0.088924
2013-01-04,-0.39838,0.858711,1.085672,-0.279761
2013-01-05,1.583161,0.540904,-0.678522,-0.749487
2013-01-06,-1.168553,-0.079933,-0.026535,1.354619


In [17]:
df2 = pd.DataFrame(
    {
        "A": 1.0,
        "B": pd.Timestamp("20130102"),
        "C": pd.Series(1, index=list(range(4)), dtype="float32"),
        "D": np.array([3] * 4, dtype="int32"),
        "E": pd.Categorical(["test", "train", "test", "train"]),
        "F": "foo",
    }
)
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [18]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

In [19]:
df2.head()

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [20]:
df.tail()

Unnamed: 0,A,B,C,D
2013-01-02,0.13969,0.790932,0.989564,-0.323414
2013-01-03,-0.112434,0.766584,-0.394805,0.062674
2013-01-04,-0.931568,1.1761,-0.920873,2.2386
2013-01-05,0.29062,-1.32471,0.170706,0.857212
2013-01-06,-0.681026,-0.076779,1.071143,0.03811


In [22]:
df.index

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [23]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [24]:
df.to_numpy()

array([[-1.65288567,  0.52073899,  1.32561683,  1.17004729],
       [ 0.13968951,  0.7909315 ,  0.9895643 , -0.3234138 ],
       [-0.11243396,  0.76658428, -0.39480469,  0.06267385],
       [-0.93156783,  1.17610046, -0.92087287,  2.23859994],
       [ 0.29061986, -1.32471016,  0.1707057 ,  0.85721191],
       [-0.68102574, -0.07677858,  1.07114335,  0.03810987]])

In [25]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,-0.491267,0.308811,0.373559,0.673872
std,0.739216,0.900685,0.903281,0.948737
min,-1.652886,-1.32471,-0.920873,-0.323414
25%,-0.868932,0.072601,-0.253427,0.044251
50%,-0.39673,0.643662,0.580135,0.459943
75%,0.076659,0.784845,1.050749,1.091838
max,0.29062,1.1761,1.325617,2.2386


In [26]:
df.T

Unnamed: 0,2013-01-01,2013-01-02,2013-01-03,2013-01-04,2013-01-05,2013-01-06
A,-1.652886,0.13969,-0.112434,-0.931568,0.29062,-0.681026
B,0.520739,0.790932,0.766584,1.1761,-1.32471,-0.076779
C,1.325617,0.989564,-0.394805,-0.920873,0.170706,1.071143
D,1.170047,-0.323414,0.062674,2.2386,0.857212,0.03811


In [27]:
df.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2013-01-01,1.170047,1.325617,0.520739,-1.652886
2013-01-02,-0.323414,0.989564,0.790932,0.13969
2013-01-03,0.062674,-0.394805,0.766584,-0.112434
2013-01-04,2.2386,-0.920873,1.1761,-0.931568
2013-01-05,0.857212,0.170706,-1.32471,0.29062
2013-01-06,0.03811,1.071143,-0.076779,-0.681026


In [7]:
df.sort_values(by="B")

Unnamed: 0,A,B,C,D
2013-01-03,0.389586,-0.542111,0.5873,0.088924
2013-01-01,-1.059885,-0.340192,-1.737988,-0.245752
2013-01-06,-1.168553,-0.079933,-0.026535,1.354619
2013-01-05,1.583161,0.540904,-0.678522,-0.749487
2013-01-02,0.156104,0.644826,-3.499711,-0.367861
2013-01-04,-0.39838,0.858711,1.085672,-0.279761


In [9]:
df["A"]

2013-01-01   -0.340192
2013-01-02    0.644826
2013-01-03   -0.542111
2013-01-04    0.858711
2013-01-05    0.540904
2013-01-06   -0.079933
Freq: D, Name: B, dtype: float64

In [10]:
df[0:3]

Unnamed: 0,A,B,C,D
2013-01-01,-1.059885,-0.340192,-1.737988,-0.245752
2013-01-02,0.156104,0.644826,-3.499711,-0.367861
2013-01-03,0.389586,-0.542111,0.5873,0.088924


In [14]:
df.iloc[0:3,1:4]

Unnamed: 0,B,C,D
2013-01-01,-0.340192,-1.737988,-0.245752
2013-01-02,0.644826,-3.499711,-0.367861
2013-01-03,-0.542111,0.5873,0.088924


In [12]:
df.loc[:, ["A", "B"]]

Unnamed: 0,A,B
2013-01-01,-1.059885,-0.340192
2013-01-02,0.156104,0.644826
2013-01-03,0.389586,-0.542111
2013-01-04,-0.39838,0.858711
2013-01-05,1.583161,0.540904
2013-01-06,-1.168553,-0.079933


In [11]:
df["20130102":"20130104"]

Unnamed: 0,A,B,C,D
2013-01-02,0.156104,0.644826,-3.499711,-0.367861
2013-01-03,0.389586,-0.542111,0.5873,0.088924
2013-01-04,-0.39838,0.858711,1.085672,-0.279761


In [19]:
import seaborn as sns
boat = sns.load_dataset("titanic")

In [20]:
boat = sns.load_dataset("titanic")

In [18]:
boat.sample(100)

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
173,0,3,male,21.00,0,0,7.9250,S,Third,man,True,,Southampton,no,True
803,1,3,male,0.42,0,1,8.5167,C,Third,child,False,,Cherbourg,yes,False
581,1,1,female,39.00,1,1,110.8833,C,First,woman,False,C,Cherbourg,yes,False
595,0,3,male,36.00,1,1,24.1500,S,Third,man,True,,Southampton,no,False
294,0,3,male,24.00,0,0,7.8958,S,Third,man,True,,Southampton,no,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
300,1,3,female,,0,0,7.7500,Q,Third,woman,False,,Queenstown,yes,True
101,0,3,male,,0,0,7.8958,S,Third,man,True,,Southampton,no,True
544,0,1,male,50.00,1,0,106.4250,C,First,man,True,C,Cherbourg,no,False
13,0,3,male,39.00,1,5,31.2750,S,Third,man,True,,Southampton,no,False


In [21]:
boat[boat["age"] < 5]

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
7,0,3,male,2.0,3,1,21.075,S,Third,child,False,,Southampton,no,False
10,1,3,female,4.0,1,1,16.7,S,Third,child,False,G,Southampton,yes,False
16,0,3,male,2.0,4,1,29.125,Q,Third,child,False,,Queenstown,no,False
43,1,2,female,3.0,1,2,41.5792,C,Second,child,False,,Cherbourg,yes,False
63,0,3,male,4.0,3,2,27.9,S,Third,child,False,,Southampton,no,False
78,1,2,male,0.83,0,2,29.0,S,Second,child,False,,Southampton,yes,False
119,0,3,female,2.0,4,2,31.275,S,Third,child,False,,Southampton,no,False
164,0,3,male,1.0,4,1,39.6875,S,Third,child,False,,Southampton,no,False
171,0,3,male,4.0,4,1,29.125,Q,Third,child,False,,Queenstown,no,False
172,1,3,female,1.0,1,1,11.1333,S,Third,child,False,,Southampton,yes,False
