In [1]:
import numpy as np
import pandas as pd

In [2]:
s = pd.Series([1, 3, 5,np.nan, 6, 8])

In [3]:
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

Create a DataFrame by passing a Numpy array with a datetime index and labeled columns.

In [4]:
dates = pd.date_range('20130101', periods=6)

In [5]:
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [12]:
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))

In [13]:
df

Unnamed: 0,A,B,C,D
2013-01-01,-1.318001,0.86141,0.135379,1.101368
2013-01-02,-0.084994,0.207227,1.976452,-1.559154
2013-01-03,-0.60726,0.343403,-0.332542,-0.127118
2013-01-04,-0.989266,0.735019,-1.351984,0.773283
2013-01-05,-0.027134,-1.489874,0.558358,-0.975254
2013-01-06,-0.579149,0.528039,0.54847,-1.48874


Create a DataFrame from a dict of objects.

In [10]:
df2 = pd.DataFrame({'A': 1.0,
                   'B': pd.Timestamp('20130102'),
                   'C': pd.Series(1, index=list(range(4)), dtype='float32'),
                   'D': np.array([3] * 4, dtype='int32'),
                   'E': pd.Categorical(['test', 'train', 'test', 'train']),
                   'F': 'foo'})

In [11]:
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


The columns of the resulting DataFrame have different dtypes.

In [14]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

Here is how to view the top and bottom rows of the frame.

In [15]:
df.head()

Unnamed: 0,A,B,C,D
2013-01-01,-1.318001,0.86141,0.135379,1.101368
2013-01-02,-0.084994,0.207227,1.976452,-1.559154
2013-01-03,-0.60726,0.343403,-0.332542,-0.127118
2013-01-04,-0.989266,0.735019,-1.351984,0.773283
2013-01-05,-0.027134,-1.489874,0.558358,-0.975254


In [16]:
df.tail()

Unnamed: 0,A,B,C,D
2013-01-02,-0.084994,0.207227,1.976452,-1.559154
2013-01-03,-0.60726,0.343403,-0.332542,-0.127118
2013-01-04,-0.989266,0.735019,-1.351984,0.773283
2013-01-05,-0.027134,-1.489874,0.558358,-0.975254
2013-01-06,-0.579149,0.528039,0.54847,-1.48874


Display the index and columns.

In [17]:
df.index

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [18]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

Covert DataFrame to numpy array. This loses its header and index definitions.

Also note: NumPy arrays can only hold 1 dtype for the entire array. When df.to_numpy() is called onto a DataFrame holding multiple dtypes, Python will attempt to cast every value to a Python object, which can be time and memory consuming.

In [19]:
df.to_numpy()

array([[-1.31800127,  0.86140999,  0.13537903,  1.10136786],
       [-0.08499354,  0.20722719,  1.97645151, -1.55915387],
       [-0.60725993,  0.34340345, -0.33254175, -0.1271176 ],
       [-0.98926636,  0.73501924, -1.35198412,  0.77328311],
       [-0.02713376, -1.48987397,  0.55835843, -0.97525424],
       [-0.57914902,  0.52803933,  0.54847047, -1.48874029]])

In [20]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,-0.600967,0.197538,0.255689,-0.379269
std,0.502184,0.861112,1.103037,1.145644
min,-1.318001,-1.489874,-1.351984,-1.559154
25%,-0.893765,0.241271,-0.215562,-1.360369
50%,-0.593204,0.435721,0.341925,-0.551186
75%,-0.208532,0.683274,0.555886,0.548183
max,-0.027134,0.86141,1.976452,1.101368


In [21]:
df.T

Unnamed: 0,2013-01-01,2013-01-02,2013-01-03,2013-01-04,2013-01-05,2013-01-06
A,-1.318001,-0.084994,-0.60726,-0.989266,-0.027134,-0.579149
B,0.86141,0.207227,0.343403,0.735019,-1.489874,0.528039
C,0.135379,1.976452,-0.332542,-1.351984,0.558358,0.54847
D,1.101368,-1.559154,-0.127118,0.773283,-0.975254,-1.48874


In [22]:
df.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2013-01-01,1.101368,0.135379,0.86141,-1.318001
2013-01-02,-1.559154,1.976452,0.207227,-0.084994
2013-01-03,-0.127118,-0.332542,0.343403,-0.60726
2013-01-04,0.773283,-1.351984,0.735019,-0.989266
2013-01-05,-0.975254,0.558358,-1.489874,-0.027134
2013-01-06,-1.48874,0.54847,0.528039,-0.579149


In [24]:
df.sort_values(by='B')

Unnamed: 0,A,B,C,D
2013-01-05,-0.027134,-1.489874,0.558358,-0.975254
2013-01-02,-0.084994,0.207227,1.976452,-1.559154
2013-01-03,-0.60726,0.343403,-0.332542,-0.127118
2013-01-06,-0.579149,0.528039,0.54847,-1.48874
2013-01-04,-0.989266,0.735019,-1.351984,0.773283
2013-01-01,-1.318001,0.86141,0.135379,1.101368
