In [5]:
import numpy as np

In [6]:
import pandas as pd


Creating a Series by passing a list of values, letting pandas create a default integer index:

In [7]:
s = pd.Series([1, 3, 5, np.nan, 6, 8])

Creating a DataFrame by passing a NumPy array, with a datetime index and labeled columns:

In [8]:
dates = pd.date_range('20130101', periods=6)

In [9]:
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [10]:
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))

In [11]:
df

Unnamed: 0,A,B,C,D
2013-01-01,1.002889,-1.030706,1.098217,-0.372401
2013-01-02,-2.215236,-2.153543,-0.359634,0.579446
2013-01-03,-0.7868,-0.763984,0.083729,0.223647
2013-01-04,-0.15363,-0.698266,0.220314,-0.444569
2013-01-05,-0.793402,1.373858,-0.790929,0.646331
2013-01-06,-2.64058,1.48753,0.061846,0.733679


Creating a DataFrame by passing a dict of objects that can be converted to series-like.

In [50]:
df2 = pd.DataFrame({'A': 1.,
   ...:                     'B': pd.Timestamp('20130102'),
   ...:                     'C': pd.Series(1, index=list(range(4)), dtype='float32'),
   ...:                     'D': np.array([3] * 4, dtype='int32'),
   ...:                     'E': pd.Categorical(["test", "train", "test", "train"]),
   ...:                     'F': 'foo'})

In [51]:
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [52]:
df2.to_numpy()

array([[1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo']],
      dtype=object)

Here is how to view the top and bottom rows of the frame:

In [42]:
df.head()



Unnamed: 0,A,B,C,D
2013-01-01,1.002889,-1.030706,1.098217,-0.372401
2013-01-02,-2.215236,-2.153543,-0.359634,0.579446
2013-01-03,-0.7868,-0.763984,0.083729,0.223647
2013-01-04,-0.15363,-0.698266,0.220314,-0.444569
2013-01-05,-0.793402,1.373858,-0.790929,0.646331


In [16]:
df.tail(3)

Unnamed: 0,A,B,C,D
2013-01-04,-0.15363,-0.698266,0.220314,-0.444569
2013-01-05,-0.793402,1.373858,-0.790929,0.646331
2013-01-06,-2.64058,1.48753,0.061846,0.733679


DataFrame.to_numpy() gives a NumPy representation of the underlying data. 

Note that this can be an expensive operation when your DataFrame has columns with different data types, which comes down to a fundamental difference between pandas and NumPy: 
NumPy arrays have one dtype for the entire array, while pandas DataFrames have one dtype per column. 

When you call DataFrame.to_numpy(), pandas will find the NumPy dtype that can hold all of the dtypes in the DataFrame. 

This may end up being object, which requires casting every value to a Python object.

For df, our DataFrame of all floating-point values, DataFrame.to_numpy() is fast and doesn’t require copying data.

In [43]:
df.to_numpy()

array([[ 1.0028892 , -1.03070619,  1.098217  , -0.37240124],
       [-2.21523586, -2.15354336, -0.35963431,  0.57944573],
       [-0.78679972, -0.76398363,  0.08372936,  0.22364692],
       [-0.15362961, -0.69826551,  0.22031436, -0.44456885],
       [-0.79340217,  1.3738577 , -0.79092945,  0.64633104],
       [-2.64057978,  1.48753029,  0.06184642,  0.73367852]])

In [44]:
df2.to_numpy()

array([['A4', 'B4', 'C4', 'D4'],
       ['A5', 'B5', 'C5', 'D5'],
       ['A6', 'B6', 'C6', 'D6'],
       ['A7', 'B7', 'C7', 'D7']], dtype=object)

In [19]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,-0.931126,-0.297518,0.052257,0.227689
std,1.338349,1.438087,0.633084,0.522891
min,-2.64058,-2.153543,-0.790929,-0.444569
25%,-1.859777,-0.964026,-0.254264,-0.223389
50%,-0.790101,-0.731125,0.072788,0.401546
75%,-0.311922,0.855827,0.186168,0.62961
max,1.002889,1.48753,1.098217,0.733679


Transposing your data:

In [20]:
df.T

Unnamed: 0,2013-01-01 00:00:00,2013-01-02 00:00:00,2013-01-03 00:00:00,2013-01-04 00:00:00,2013-01-05 00:00:00,2013-01-06 00:00:00
A,1.002889,-2.215236,-0.7868,-0.15363,-0.793402,-2.64058
B,-1.030706,-2.153543,-0.763984,-0.698266,1.373858,1.48753
C,1.098217,-0.359634,0.083729,0.220314,-0.790929,0.061846
D,-0.372401,0.579446,0.223647,-0.444569,0.646331,0.733679


Sorting by an axis:

In [21]:
df.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2013-01-01,-0.372401,1.098217,-1.030706,1.002889
2013-01-02,0.579446,-0.359634,-2.153543,-2.215236
2013-01-03,0.223647,0.083729,-0.763984,-0.7868
2013-01-04,-0.444569,0.220314,-0.698266,-0.15363
2013-01-05,0.646331,-0.790929,1.373858,-0.793402
2013-01-06,0.733679,0.061846,1.48753,-2.64058


Sorting by values:

In [22]:
df.sort_values(by='B')

Unnamed: 0,A,B,C,D
2013-01-02,-2.215236,-2.153543,-0.359634,0.579446
2013-01-01,1.002889,-1.030706,1.098217,-0.372401
2013-01-03,-0.7868,-0.763984,0.083729,0.223647
2013-01-04,-0.15363,-0.698266,0.220314,-0.444569
2013-01-05,-0.793402,1.373858,-0.790929,0.646331
2013-01-06,-2.64058,1.48753,0.061846,0.733679


## Getting
Selecting a single column, which yields a Series, equivalent to df.A:

In [23]:
df['A']

2013-01-01    1.002889
2013-01-02   -2.215236
2013-01-03   -0.786800
2013-01-04   -0.153630
2013-01-05   -0.793402
2013-01-06   -2.640580
Freq: D, Name: A, dtype: float64

Selecting via [], which slices the rows.

In [24]:
df

Unnamed: 0,A,B,C,D
2013-01-01,1.002889,-1.030706,1.098217,-0.372401
2013-01-02,-2.215236,-2.153543,-0.359634,0.579446
2013-01-03,-0.7868,-0.763984,0.083729,0.223647
2013-01-04,-0.15363,-0.698266,0.220314,-0.444569
2013-01-05,-0.793402,1.373858,-0.790929,0.646331
2013-01-06,-2.64058,1.48753,0.061846,0.733679


In [25]:
df1 = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'],
   ...:                     'B': ['B0', 'B1', 'B2', 'B3'],
   ...:                     'C': ['C0', 'C1', 'C2', 'C3'],
   ...:                     'D': ['D0', 'D1', 'D2', 'D3']},
   ...:                    index=[0, 1, 2, 3])

In [26]:
df2 = pd.DataFrame({'A': ['A4', 'A5', 'A6', 'A7'],
   ...:                     'B': ['B4', 'B5', 'B6', 'B7'],
   ...:                     'C': ['C4', 'C5', 'C6', 'C7'],
   ...:                     'D': ['D4', 'D5', 'D6', 'D7']},
   ...:                    index=[4, 5, 6, 7])
   ...: 

In [27]:
df3 = pd.DataFrame({'A': ['A8', 'A9', 'A10', 'A11'],
   ...:                     'B': ['B8', 'B9', 'B10', 'B11'],
   ...:                     'C': ['C8', 'C9', 'C10', 'C11'],
   ...:                     'D': ['D8', 'D9', 'D10', 'D11']},
   ...:                    index=[8, 9, 10, 11])

In [28]:
frames = [df1, df2, df3]

In [29]:
frames

[    A   B   C   D
 0  A0  B0  C0  D0
 1  A1  B1  C1  D1
 2  A2  B2  C2  D2
 3  A3  B3  C3  D3,     A   B   C   D
 4  A4  B4  C4  D4
 5  A5  B5  C5  D5
 6  A6  B6  C6  D6
 7  A7  B7  C7  D7,       A    B    C    D
 8    A8   B8   C8   D8
 9    A9   B9   C9   D9
 10  A10  B10  C10  D10
 11  A11  B11  C11  D11]

In [30]:
result = pd.concat(frames)

In [31]:
result

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,C1,D1
2,A2,B2,C2,D2
3,A3,B3,C3,D3
4,A4,B4,C4,D4
5,A5,B5,C5,D5
6,A6,B6,C6,D6
7,A7,B7,C7,D7
8,A8,B8,C8,D8
9,A9,B9,C9,D9


In [41]:
result[1:]

Unnamed: 0,Unnamed: 1,A,B,C,D
x,1,A1,B1,C1,D1
x,2,A2,B2,C2,D2
x,3,A3,B3,C3,D3
y,4,A4,B4,C4,D4
y,5,A5,B5,C5,D5
y,6,A6,B6,C6,D6
y,7,A7,B7,C7,D7
z,8,A8,B8,C8,D8
z,9,A9,B9,C9,D9
z,10,A10,B10,C10,D10


In [36]:
result = pd.concat(frames, keys=['x', 'y', 'z'])

In [34]:
result

Unnamed: 0,Unnamed: 1,A,B,C,D
x,0,A0,B0,C0,D0
x,1,A1,B1,C1,D1
x,2,A2,B2,C2,D2
x,3,A3,B3,C3,D3
y,4,A4,B4,C4,D4
y,5,A5,B5,C5,D5
y,6,A6,B6,C6,D6
y,7,A7,B7,C7,D7
z,8,A8,B8,C8,D8
z,9,A9,B9,C9,D9
