# 10min 学会 Pandas
> 引用链接：https://pandas.pydata.org/docs/user_guide/10min.html

## 引入环境

In [2]:
import numpy as np
import pandas as pd

## 创建对象

In [6]:
s = pd.Series([1, 3, 5, np.nan, 6, 8])
print(s)

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64


通过date_range()和NumPy array来创建一个DataFrame

In [7]:
# 创建一个 DataFrame的index组
dates = pd.date_range("20130101", periods=6)
print(dates)

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')


In [10]:
# index 控制行标签 columns 控制列标签
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))
print(df)

                   A         B         C         D
2013-01-01  1.232556 -0.381929 -0.257304 -0.773841
2013-01-02 -0.324246 -1.079822  0.442704 -1.250441
2013-01-03  1.357608 -1.628856  0.336763  0.273371
2013-01-04  0.581455 -0.313582  0.213744  0.145471
2013-01-05  0.370641  1.180874 -0.802192  1.202442
2013-01-06 -0.882789 -1.561384  0.130778  1.283252


通过对象字典来创建DataFrame

In [20]:
df2 = pd.DataFrame(
    {
        # 如果是单独一个值 则会直接填充
        "A": 1.0,
        "B": pd.Timestamp("20130102"),
        # 如果是一个数组 则会按序填充
        "C": pd.Series(1, index=list(range(4)), dtype="float32"),
        "D": np.array([3]*4, dtype="int32"),
        "E": pd.Categorical(["test", "train", "test", "train"]),
        "F": "foo",
        
    }
)

print(df2)
print(df2.dtypes)

     A          B    C  D      E    F
0  1.0 2013-01-02  1.0  3   test  foo
1  1.0 2013-01-02  1.0  3  train  foo
2  1.0 2013-01-02  1.0  3   test  foo
3  1.0 2013-01-02  1.0  3  train  foo
A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object


## 查看数据

In [26]:
print('前2个数据：')
print(df.head(2))
print('后3个数据：')
print(df.tail(3))

前2个数据：
                   A         B         C         D
2013-01-01  1.232556 -0.381929 -0.257304 -0.773841
2013-01-02 -0.324246 -1.079822  0.442704 -1.250441
后3个数据：
                   A         B         C         D
2013-01-04  0.581455 -0.313582  0.213744  0.145471
2013-01-05  0.370641  1.180874 -0.802192  1.202442
2013-01-06 -0.882789 -1.561384  0.130778  1.283252


In [28]:
# 得到DataFrame的行标签(index) 和 列标签(columns)
print('index: ')
print(df.index)
print('column: ')
print(df.columns)

index: 
DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')
column: 
Index(['A', 'B', 'C', 'D'], dtype='object')


In [30]:
# 将DataFrame转换成NumPy
print('df(DataFrame): \n', df)
print('df(NumPy): \n', df.to_numpy())

df(DataFrame): 
                    A         B         C         D
2013-01-01  1.232556 -0.381929 -0.257304 -0.773841
2013-01-02 -0.324246 -1.079822  0.442704 -1.250441
2013-01-03  1.357608 -1.628856  0.336763  0.273371
2013-01-04  0.581455 -0.313582  0.213744  0.145471
2013-01-05  0.370641  1.180874 -0.802192  1.202442
2013-01-06 -0.882789 -1.561384  0.130778  1.283252
df(NumPy): 
 [[ 1.23255574 -0.38192864 -0.25730368 -0.77384121]
 [-0.3242464  -1.07982175  0.44270441 -1.25044097]
 [ 1.35760777 -1.62885618  0.33676335  0.2733714 ]
 [ 0.58145512 -0.31358219  0.21374409  0.14547117]
 [ 0.37064094  1.18087365 -0.8021915   1.20244166]
 [-0.88278902 -1.5613839   0.13077791  1.28325245]]
