# 10 Minutes to pandas
- 参考：https://pandas.pydata.org/pandas-docs/stable/getting_started/10min.html

## Import dependence

In [115]:
import numpy as np
import pandas as pd

## Create Object

### Creating a Series by passing a list of values, letting pandas create a default integer index



In [116]:
s = pd.Series([1,3,54,np.nan,6,8,np.nan,'hello'])
s

0        1
1        3
2       54
3      NaN
4        6
5        8
6      NaN
7    hello
dtype: object

In [117]:
v = pd.Series([1,3,54,np.nan,6,8,np.nan])
v

0     1.0
1     3.0
2    54.0
3     NaN
4     6.0
5     8.0
6     NaN
dtype: float64

### Creating a DataFrame by passing a NumPy array, with a datetime index and labeled columns

In [118]:
dates = pd.date_range('20190501',periods=6)
dates

DatetimeIndex(['2019-05-01', '2019-05-02', '2019-05-03', '2019-05-04',
               '2019-05-05', '2019-05-06'],
              dtype='datetime64[ns]', freq='D')

In [119]:
df = pd.DataFrame(np.random.rand(6,5),index=dates,columns=list('ABCDE'))
df

Unnamed: 0,A,B,C,D,E
2019-05-01,0.315118,0.966615,0.992685,0.214536,0.292566
2019-05-02,0.584462,0.52041,0.493577,0.672422,0.624682
2019-05-03,0.921174,0.412787,0.005845,0.025251,0.471946
2019-05-04,0.163483,0.515057,0.312956,0.396357,0.997597
2019-05-05,0.92028,0.135982,0.436941,0.741011,0.486268
2019-05-06,0.959126,0.941923,0.623312,0.348573,0.979446


### Creating a DataFrame by passing a dict of objects that can be converted to series-like.

In [120]:
df2 = pd.DataFrame({'A':1.,
                   'B':pd.Timestamp('20190501'),
                   'C':pd.Series(2.2,index=list(range(4)),dtype='float32'),
                   'D':np.array([3]*4,dtype='int32'),
                   'E':pd.Categorical(['test','train','test','train2']),
                   'F':'foo'})
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2019-05-01,2.2,3,test,foo
1,1.0,2019-05-01,2.2,3,train,foo
2,1.0,2019-05-01,2.2,3,test,foo
3,1.0,2019-05-01,2.2,3,train2,foo


**The columns of the resulting DataFrame have different dtypes.**

In [121]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

## Viewing Data

**Here is how to view the top and bottom rows of the frame:**

In [122]:
df.head(3)

Unnamed: 0,A,B,C,D,E
2019-05-01,0.315118,0.966615,0.992685,0.214536,0.292566
2019-05-02,0.584462,0.52041,0.493577,0.672422,0.624682
2019-05-03,0.921174,0.412787,0.005845,0.025251,0.471946


In [123]:
df.tail(3)

Unnamed: 0,A,B,C,D,E
2019-05-04,0.163483,0.515057,0.312956,0.396357,0.997597
2019-05-05,0.92028,0.135982,0.436941,0.741011,0.486268
2019-05-06,0.959126,0.941923,0.623312,0.348573,0.979446


**Display the index, columns:**



In [124]:
df.index

DatetimeIndex(['2019-05-01', '2019-05-02', '2019-05-03', '2019-05-04',
               '2019-05-05', '2019-05-06'],
              dtype='datetime64[ns]', freq='D')

In [125]:
df.columns

Index([u'A', u'B', u'C', u'D', u'E'], dtype='object')

**DataFrame.to_numpy() gives a NumPy representation of the underlying data**

In [126]:
df.to_numpy()

array([[0.31511757, 0.96661455, 0.99268472, 0.21453557, 0.29256559],
       [0.58446154, 0.52041028, 0.49357726, 0.67242175, 0.62468238],
       [0.9211737 , 0.4127874 , 0.00584512, 0.02525107, 0.47194616],
       [0.16348269, 0.51505713, 0.31295575, 0.39635691, 0.99759666],
       [0.92028042, 0.13598207, 0.43694137, 0.74101107, 0.48626843],
       [0.95912614, 0.94192322, 0.6233117 , 0.34857317, 0.97944608]])

In [127]:
df2.to_numpy()

array([[1.0, Timestamp('2019-05-01 00:00:00'), 2.200000047683716, 3,
        'test', 'foo'],
       [1.0, Timestamp('2019-05-01 00:00:00'), 2.200000047683716, 3,
        'train', 'foo'],
       [1.0, Timestamp('2019-05-01 00:00:00'), 2.200000047683716, 3,
        'test', 'foo'],
       [1.0, Timestamp('2019-05-01 00:00:00'), 2.200000047683716, 3,
        'train2', 'foo']], dtype=object)

**Transposing your data:**


In [128]:
df.T

Unnamed: 0,2019-05-01 00:00:00,2019-05-02 00:00:00,2019-05-03 00:00:00,2019-05-04 00:00:00,2019-05-05 00:00:00,2019-05-06 00:00:00
A,0.315118,0.584462,0.921174,0.163483,0.92028,0.959126
B,0.966615,0.52041,0.412787,0.515057,0.135982,0.941923
C,0.992685,0.493577,0.005845,0.312956,0.436941,0.623312
D,0.214536,0.672422,0.025251,0.396357,0.741011,0.348573
E,0.292566,0.624682,0.471946,0.997597,0.486268,0.979446


**Sorting by an axis:**


In [129]:
df.sort_index(axis=0,ascending=False)

Unnamed: 0,A,B,C,D,E
2019-05-06,0.959126,0.941923,0.623312,0.348573,0.979446
2019-05-05,0.92028,0.135982,0.436941,0.741011,0.486268
2019-05-04,0.163483,0.515057,0.312956,0.396357,0.997597
2019-05-03,0.921174,0.412787,0.005845,0.025251,0.471946
2019-05-02,0.584462,0.52041,0.493577,0.672422,0.624682
2019-05-01,0.315118,0.966615,0.992685,0.214536,0.292566


**Sorting by values:**

In [114]:
df.sort_values(by='C')

Unnamed: 0,A,B,C,D,E
2019-05-02,0.719861,0.182137,0.003867,0.33976,0.580781
2019-05-04,0.704505,0.095303,0.04904,0.962083,0.17094
2019-05-06,0.9068,0.564411,0.326441,0.505529,0.913726
2019-05-01,0.591868,0.794424,0.504491,0.30373,0.432085
2019-05-03,0.830056,0.159142,0.845862,0.92205,0.190491
2019-05-05,0.706721,0.182011,0.928425,0.255347,0.61576
