### Pandas引入约定

In [1]:
from pandas import Series,DataFrame

In [2]:
import pandas as pd

## Series

In [3]:
import numpy as np

### 通过一维数组创建Series

In [4]:
arr=np.array([1,2,3,4])

In [5]:
series01 = Series(arr)

In [6]:
series01

0    1
1    2
2    3
3    4
dtype: int32

In [7]:
series01.index

RangeIndex(start=0, stop=4, step=1)

In [8]:
series01.values

array([1, 2, 3, 4])

In [9]:
series01.dtype

dtype('int32')

In [10]:
series02=Series([34.5,56.78,45.67])

In [11]:
series02

0    34.50
1    56.78
2    45.67
dtype: float64

In [12]:
series02.index

RangeIndex(start=0, stop=3, step=1)

### 修改默认索引
    通过数组创建时，如果没有指定索引，默认会创建一个从0到N-1（N为数据的长度）的整数型索引,可以通过赋值的方式进行修改

In [13]:
series02.index=['product1','product2','product3']

In [14]:
series02

product1    34.50
product2    56.78
product3    45.67
dtype: float64

    通过数组创建Series时，可以通过index参数传入一个明确的标签索引

In [15]:
series03=Series([98,56,88,45],index=['语文','数学','英语','体育'])

In [16]:
series03

语文    98
数学    56
英语    88
体育    45
dtype: int64

In [17]:
series03.index

Index([u'语文', u'数学', u'英语', u'体育'], dtype='object')

In [18]:
series03.values

array([98, 56, 88, 45], dtype=int64)

### 通过字典方式创建Series
    Series可以被看做是一个定长的有序字典，是索引值到数据值的一个映射，可以直接通过字典来创建Series

In [19]:
a_dict={'201701':6798.98,'201702':34556.89,'201703':3748758.88}

In [20]:
series04= Series(a_dict)

In [21]:
series04

201701       6798.98
201702      34556.89
201703    3748758.88
dtype: float64

In [22]:
series04.index

Index([u'201701', u'201702', u'201703'], dtype='object')

### Series应用NumPy数组运算

In [23]:
series04['201701']

6798.9799999999996

In [24]:
series04[0]

6798.9799999999996

In [25]:
series04[series04>10000]

201702      34556.89
201703    3748758.88
dtype: float64

In [26]:
series04>10000

201701    False
201702     True
201703     True
dtype: bool

In [27]:
series04/100

201701       67.9898
201702      345.5689
201703    37487.5888
dtype: float64

In [28]:
series01

0    1
1    2
2    3
3    4
dtype: int32

In [29]:
np.exp([series01])

array([[  2.71828183,   7.3890561 ,  20.08553692,  54.59815003]])

### Series缺失值检测
     pandas中的isnull和notnull函数可用于缺失值检测,返回一个布尔类型的Series

In [30]:
scores = Series({"Tom":89,"John":88,"Merry":96,"Max":65})

In [31]:
scores

John     88
Max      65
Merry    96
Tom      89
dtype: int64

In [32]:
new_index=['Tom','Max','Joe',"John","Merry"]

In [34]:
scores.index=new_index

ValueError: Length mismatch: Expected axis has 4 elements, new values have 5 elements

In [35]:
scores=Series(scores,index=new_index)

In [36]:
scores

Tom      89.0
Max      65.0
Joe       NaN
John     88.0
Merry    96.0
dtype: float64

In [37]:
pd.isnull(scores)

Tom      False
Max      False
Joe       True
John     False
Merry    False
dtype: bool

In [38]:
pd.notnull(scores)

Tom       True
Max       True
Joe      False
John      True
Merry     True
dtype: bool

In [39]:
scores[pd.isnull(scores)]

Joe   NaN
dtype: float64

In [40]:
scores[pd.notnull(scores)]

Tom      89.0
Max      65.0
John     88.0
Merry    96.0
dtype: float64

### Series 自动对齐
    不同Series之间进行运算，会自动对齐不同索引的数据

In [41]:
product_num = Series([23,45,67,89],index=['p3','p1','p2','p5'])

In [43]:
product_num

p3    23
p1    45
p2    67
p5    89
dtype: int64

In [44]:
product_price_table=Series([9.98,2.34,4.56,5.67,8.78],index=['p1','p2','p3','p4','p5'])

In [45]:
product_price_table

p1    9.98
p2    2.34
p3    4.56
p4    5.67
p5    8.78
dtype: float64

In [46]:
product_sum=product_num*product_price_table

In [47]:
product_sum

p1    449.10
p2    156.78
p3    104.88
p4       NaN
p5    781.42
dtype: float64

### Series 及其索引的name属性
    Series对象本身及其索引都有一个name属性，可赋值设置

In [48]:
product_num.name='ProductNums'

In [49]:
product_num.index.name='ProductType'

In [50]:
product_num

ProductType
p3    23
p1    45
p2    67
p5    89
Name: ProductNums, dtype: int64

### DataFrame

#### 通过二维数组创建DataFrame

In [51]:
df01=DataFrame([['Tom','Merry',"John"],[76,98,100]])

In [52]:
df01

Unnamed: 0,0,1,2
0,Tom,Merry,John
1,76,98,100


In [53]:
df02=DataFrame([['Tom',76],['Merry',98],['John',100]])

In [54]:
df02

Unnamed: 0,0,1
0,Tom,76
1,Merry,98
2,John,100


In [55]:
arr=np.array([['Tom',76],['Merry',98],['John',100]])

In [56]:
df03=DataFrame(arr,columns=['name','score'])

In [57]:
df03

Unnamed: 0,name,score
0,Tom,76
1,Merry,98
2,John,100


In [58]:
df04=DataFrame(arr,index=['one','two','three'],columns=['name','score'])

In [59]:
df04

Unnamed: 0,name,score
one,Tom,76
two,Merry,98
three,John,100


#### 通过字典方式创建DataFrame

In [60]:
data={'apart':['1001','1002','1003','1001'],
     'profits':[567.87,987.87,873,498.87],
     'year':[2001,2001,2001,2000]}

In [61]:
df=DataFrame(data)

In [62]:
df

Unnamed: 0,apart,profits,year
0,1001,567.87,2001
1,1002,987.87,2001
2,1003,873.0,2001
3,1001,498.87,2000


In [74]:
df.index

RangeIndex(start=0, stop=4, step=1)

In [75]:
df.columns

Index([u'apart', u'profits', u'year'], dtype='object')

In [76]:
df.values

array([['1001', 567.87, 2001L],
       ['1002', 987.87, 2001L],
       ['1003', 873.0, 2001L],
       ['1001', 498.87, 2000L]], dtype=object)

In [77]:
df=DataFrame(data,index=['one','two','three','four'])

In [78]:
df

Unnamed: 0,apart,profits,year
one,1001,567.87,2001
two,1002,987.87,2001
three,1003,873.0,2001
four,1001,498.87,2000


In [79]:
df.index

Index([u'one', u'two', u'three', u'four'], dtype='object')