In [1]:
import numpy as np
import pandas as pd

In [2]:
s = pd.Series([1, 3, 5, np.nan, 6, 8])

In [3]:
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [4]:
dates = pd.date_range("20130101", periods=6)
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [5]:
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))

In [6]:
df

Unnamed: 0,A,B,C,D
2013-01-01,-1.831629,0.892281,0.963239,0.262451
2013-01-02,1.447795,-0.574837,-0.679315,-0.985518
2013-01-03,1.023591,0.07483,-0.988148,0.327019
2013-01-04,-0.52006,-0.19567,1.2197,2.272401
2013-01-05,-1.55343,0.85614,-0.080261,-0.780524
2013-01-06,0.853564,-0.157926,-1.144326,0.004678


In [7]:
df = pd.DataFrame(
    {
        "A": 1.0,
        "B": pd.Timestamp("20130102"),
        "C": pd.Series(1, index=list(range(4)), dtype="float32"),
        "D": np.array([3] * 4, dtype="int32"),
        "E": pd.Categorical(["test", "train", "test", "train"]),
        "F": "foo",
    }
)
df

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [8]:
df.dtypes

A          float64
B    datetime64[s]
C          float32
D            int32
E         category
F           object
dtype: object

In [14]:
df.head(2)

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo


In [13]:
df.tail(1)

Unnamed: 0,A,B,C,D,E,F
3,1.0,2013-01-02,1.0,3,train,foo


In [15]:
df.index

Index([0, 1, 2, 3], dtype='int64')

In [16]:
df.columns

Index(['A', 'B', 'C', 'D', 'E', 'F'], dtype='object')

In [17]:
df.to_numpy().dtype

array([[1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo']],
      dtype=object)

In [18]:
df.to_numpy().dtype # object => as it fits all

dtype('O')

In [19]:
df.describe()

Unnamed: 0,A,B,C,D
count,4.0,4,4.0,4.0
mean,1.0,2013-01-02 00:00:00,1.0,3.0
min,1.0,2013-01-02 00:00:00,1.0,3.0
25%,1.0,2013-01-02 00:00:00,1.0,3.0
50%,1.0,2013-01-02 00:00:00,1.0,3.0
75%,1.0,2013-01-02 00:00:00,1.0,3.0
max,1.0,2013-01-02 00:00:00,1.0,3.0
std,0.0,,0.0,0.0


In [20]:
df.T

Unnamed: 0,0,1,2,3
A,1.0,1.0,1.0,1.0
B,2013-01-02 00:00:00,2013-01-02 00:00:00,2013-01-02 00:00:00,2013-01-02 00:00:00
C,1.0,1.0,1.0,1.0
D,3,3,3,3
E,test,train,test,train
F,foo,foo,foo,foo


In [21]:
df.describe()

Unnamed: 0,A,B,C,D
count,4.0,4,4.0,4.0
mean,1.0,2013-01-02 00:00:00,1.0,3.0
min,1.0,2013-01-02 00:00:00,1.0,3.0
25%,1.0,2013-01-02 00:00:00,1.0,3.0
50%,1.0,2013-01-02 00:00:00,1.0,3.0
75%,1.0,2013-01-02 00:00:00,1.0,3.0
max,1.0,2013-01-02 00:00:00,1.0,3.0
std,0.0,,0.0,0.0


In [22]:
df

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [23]:
df.sort_index(ascending=False)

Unnamed: 0,A,B,C,D,E,F
3,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
0,1.0,2013-01-02,1.0,3,test,foo


In [24]:
df.sort_index(axis=1, ascending=False)

Unnamed: 0,F,E,D,C,B,A
0,foo,test,3,1.0,2013-01-02,1.0
1,foo,train,3,1.0,2013-01-02,1.0
2,foo,test,3,1.0,2013-01-02,1.0
3,foo,train,3,1.0,2013-01-02,1.0


In [25]:
df

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [26]:
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))
df

Unnamed: 0,A,B,C,D
2013-01-01,-0.861423,0.17111,0.341813,0.40428
2013-01-02,0.966717,0.55866,1.665377,0.915034
2013-01-03,1.742624,2.1402,0.249053,0.400376
2013-01-04,-0.410969,-0.989028,-1.36813,0.672689
2013-01-05,-0.505898,0.574863,0.479468,0.39043
2013-01-06,2.44502,0.281749,0.801152,-1.262993


In [27]:
df.sort_values("B")

Unnamed: 0,A,B,C,D
2013-01-04,-0.410969,-0.989028,-1.36813,0.672689
2013-01-01,-0.861423,0.17111,0.341813,0.40428
2013-01-06,2.44502,0.281749,0.801152,-1.262993
2013-01-02,0.966717,0.55866,1.665377,0.915034
2013-01-05,-0.505898,0.574863,0.479468,0.39043
2013-01-03,1.742624,2.1402,0.249053,0.400376


In [28]:
df["A"]

2013-01-01   -0.861423
2013-01-02    0.966717
2013-01-03    1.742624
2013-01-04   -0.410969
2013-01-05   -0.505898
2013-01-06    2.445020
Freq: D, Name: A, dtype: float64

In [29]:
type(df["A"])

pandas.core.series.Series

In [30]:
df[0:3]

Unnamed: 0,A,B,C,D
2013-01-01,-0.861423,0.17111,0.341813,0.40428
2013-01-02,0.966717,0.55866,1.665377,0.915034
2013-01-03,1.742624,2.1402,0.249053,0.400376


In [31]:
df["20130101":"20130103"]

Unnamed: 0,A,B,C,D
2013-01-01,-0.861423,0.17111,0.341813,0.40428
2013-01-02,0.966717,0.55866,1.665377,0.915034
2013-01-03,1.742624,2.1402,0.249053,0.400376


In [32]:
df.loc["20130103"]

A    1.742624
B    2.140200
C    0.249053
D    0.400376
Name: 2013-01-03 00:00:00, dtype: float64

In [33]:
df.loc[:, ["A", "B"]]

Unnamed: 0,A,B
2013-01-01,-0.861423,0.17111
2013-01-02,0.966717,0.55866
2013-01-03,1.742624,2.1402
2013-01-04,-0.410969,-0.989028
2013-01-05,-0.505898,0.574863
2013-01-06,2.44502,0.281749


In [35]:
df.loc["20130102":"20130104", "A": "B"]

Unnamed: 0,A,B
2013-01-02,0.966717,0.55866
2013-01-03,1.742624,2.1402
2013-01-04,-0.410969,-0.989028
