In [1]:
import numpy as np
import pandas as pd

## Object creation

In [2]:
s = pd.Series([1, 3, 5, np.nan, 6, 8])
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [3]:
dates = pd.date_range('20130101', periods=6)
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [5]:
df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=list('ABCD'))
df

Unnamed: 0,A,B,C,D
2013-01-01,3.206741,0.546167,1.063248,-0.139581
2013-01-02,0.696769,0.154739,-0.494991,0.689771
2013-01-03,1.505585,0.437031,1.08875,0.535271
2013-01-04,0.544482,-0.070716,0.86514,0.111277
2013-01-05,0.194684,0.533071,-2.236613,-0.873917
2013-01-06,0.278263,1.657618,1.183768,0.811066


In [6]:
df2 = pd.DataFrame({'A': 1.0,
                       'B': pd.Timestamp('20130102'),
                       'C': pd.Series(1, index=list(range(4)), dtype='float32'),
                       'D': np.array([3] * 4, dtype='int32'),
                       'E': pd.Categorical(["test", "train", "test", "train"]),
                       'F': 'foo'})
df2


Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [9]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

## Viewing Data

In [12]:
df.head()

Unnamed: 0,A,B,C,D
2013-01-01,3.206741,0.546167,1.063248,-0.139581
2013-01-02,0.696769,0.154739,-0.494991,0.689771
2013-01-03,1.505585,0.437031,1.08875,0.535271
2013-01-04,0.544482,-0.070716,0.86514,0.111277
2013-01-05,0.194684,0.533071,-2.236613,-0.873917


In [13]:
df.tail()

Unnamed: 0,A,B,C,D
2013-01-02,0.696769,0.154739,-0.494991,0.689771
2013-01-03,1.505585,0.437031,1.08875,0.535271
2013-01-04,0.544482,-0.070716,0.86514,0.111277
2013-01-05,0.194684,0.533071,-2.236613,-0.873917
2013-01-06,0.278263,1.657618,1.183768,0.811066


In [14]:
df.index

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [15]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [16]:
df.to_numpy()

array([[ 3.20674081,  0.5461671 ,  1.0632478 , -0.13958073],
       [ 0.69676934,  0.15473944, -0.49499143,  0.68977057],
       [ 1.50558512,  0.4370307 ,  1.08874972,  0.53527067],
       [ 0.54448209, -0.07071602,  0.86513996,  0.11127726],
       [ 0.19468359,  0.53307125, -2.2366131 , -0.87391699],
       [ 0.27826305,  1.65761771,  1.18376771,  0.81106613]])

In [17]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,1.071087,0.542985,0.244883,0.188981
std,1.145765,0.596842,1.36772,0.632683
min,0.194684,-0.070716,-2.236613,-0.873917
25%,0.344818,0.225312,-0.154959,-0.076866
50%,0.620626,0.485051,0.964194,0.323274
75%,1.303381,0.542893,1.082374,0.651146
max,3.206741,1.657618,1.183768,0.811066


In [18]:
df.T

Unnamed: 0,2013-01-01,2013-01-02,2013-01-03,2013-01-04,2013-01-05,2013-01-06
A,3.206741,0.696769,1.505585,0.544482,0.194684,0.278263
B,0.546167,0.154739,0.437031,-0.070716,0.533071,1.657618
C,1.063248,-0.494991,1.08875,0.86514,-2.236613,1.183768
D,-0.139581,0.689771,0.535271,0.111277,-0.873917,0.811066


In [23]:
df.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2013-01-01,-0.139581,1.063248,0.546167,3.206741
2013-01-02,0.689771,-0.494991,0.154739,0.696769
2013-01-03,0.535271,1.08875,0.437031,1.505585
2013-01-04,0.111277,0.86514,-0.070716,0.544482
2013-01-05,-0.873917,-2.236613,0.533071,0.194684
2013-01-06,0.811066,1.183768,1.657618,0.278263


In [24]:
df.sort_values(by='B')

Unnamed: 0,A,B,C,D
2013-01-04,0.544482,-0.070716,0.86514,0.111277
2013-01-02,0.696769,0.154739,-0.494991,0.689771
2013-01-03,1.505585,0.437031,1.08875,0.535271
2013-01-05,0.194684,0.533071,-2.236613,-0.873917
2013-01-01,3.206741,0.546167,1.063248,-0.139581
2013-01-06,0.278263,1.657618,1.183768,0.811066
