In [1]:
import numpy as np
import pandas as pd

In [2]:
# Pandas提供了3种数据对象，分别是Series、DataFrame和Panel
# Series对象
g=np.array([27466.15,24899.3,19610.9,19492.4,17885.39,17558.76,15475.09,12170.2]) # 该数组从数据结构的角度来说，数组的索引缺乏明确的意义

gdp=pd.Series(g,index=['shanghai','beijing','guangzhou','shenzhen','tianjin','chongqing','suzhou','chengdu']) # 创建Series对象来解决
                                                                                                              # 索引意义不明确的问题
gdp

shanghai     27466.15
beijing      24899.30
guangzhou    19610.90
shenzhen     19492.40
tianjin      17885.39
chongqing    17558.76
suzhou       15475.09
chengdu      12170.20
dtype: float64

In [3]:
g2=pd.Series(g) # index参数是标签索引，如果不设置则默认是从0开始的索引
g2

0    27466.15
1    24899.30
2    19610.90
3    19492.40
4    17885.39
5    17558.76
6    15475.09
7    12170.20
dtype: float64

In [4]:
pd.Series(data=[100,200,300])

0    100
1    200
2    300
dtype: int64

In [5]:
pd.Series(100,index=['a','b','c'])  # data参数传数字跟字符串效果一样

a    100
b    100
c    100
dtype: int64

In [6]:
d=pd.Series('faker',index=[3,4,5])  # 标签参数如果传入整数列表，则用该标签索引代替位置索引
d

3    faker
4    faker
5    faker
dtype: object

In [7]:
d[3]

'faker'

In [8]:
# 还可以给data参数传字典，字典的Key可以充当标签索引
gdp2=pd.Series({'wuhan':11912.6,'hangzhou':11050.5,'nanjing':10503})
gdp2

wuhan       11912.6
hangzhou    11050.5
nanjing     10503.0
dtype: float64

In [9]:
gdp3=pd.Series({'wuhan':11912.6,'hangzhou':11050.5,'nanjing':10503},index=['nanjing','wuhan','hangzhou'])  # 通过设置index参数
                                                                                                           # 修改标志索引的顺序
gdp3

nanjing     10503.0
wuhan       11912.6
hangzhou    11050.5
dtype: float64

In [10]:
# Series对象的index和values属性
# index属性获取标签索引，values属性获取元素
gdp.index

Index(['shanghai', 'beijing', 'guangzhou', 'shenzhen', 'tianjin', 'chongqing',
       'suzhou', 'chengdu'],
      dtype='object')

In [11]:
gdp.values

array([27466.15, 24899.3 , 19610.9 , 19492.4 , 17885.39, 17558.76,
       15475.09, 12170.2 ])

In [12]:
# 除了一开始设置标签索引还可以后期修改
gdp.index=['SHANGHAI','BEIJING','GUANGZHOU','SHENZHEN','TIANJING','CHONGQING','SUZHOU','CHENGDU']
gdp

SHANGHAI     27466.15
BEIJING      24899.30
GUANGZHOU    19610.90
SHENZHEN     19492.40
TIANJING     17885.39
CHONGQING    17558.76
SUZHOU       15475.09
CHENGDU      12170.20
dtype: float64

In [13]:
gdp.name='GDP'  # name属性用来描述对象
gdp

SHANGHAI     27466.15
BEIJING      24899.30
GUANGZHOU    19610.90
SHENZHEN     19492.40
TIANJING     17885.39
CHONGQING    17558.76
SUZHOU       15475.09
CHENGDU      12170.20
Name: GDP, dtype: float64

In [14]:
gdp.index.name='City Name'
gdp

City Name
SHANGHAI     27466.15
BEIJING      24899.30
GUANGZHOU    19610.90
SHENZHEN     19492.40
TIANJING     17885.39
CHONGQING    17558.76
SUZHOU       15475.09
CHENGDU      12170.20
Name: GDP, dtype: float64

In [16]:
# DataFrame对象
gp=pd.DataFrame([[21313,34314],[4543224,98989],[222223,5654555]])
gp

Unnamed: 0,0,1
0,21313,34314
1,4543224,98989
2,222223,5654555


In [17]:
gp.index=['YN','SC','BJ']
gp.columns=['GDP','Populations']
gp

Unnamed: 0,GDP,Populations
YN,21313,34314
SC,4543224,98989
BJ,222223,5654555


In [18]:
gp.index.name='Province'
gp.columns.name='Items'
gp

Items,GDP,Populations
Province,Unnamed: 1_level_1,Unnamed: 2_level_1
YN,21313,34314
SC,4543224,98989
BJ,222223,5654555


In [19]:
# 传入字典
pd.DataFrame({'city':['beijing','beijing','hubei','shanghai'],'mark':[100,89,75,80]},index=['PKU','tsinghua','Wuhan','Fudan'])

Unnamed: 0,city,mark
PKU,beijing,100
tsinghua,beijing,89
Wuhan,hubei,75
Fudan,shanghai,80


In [20]:
# 传入数组
u=np.array([('beijing',100),('beijing',89),('hubei',75),('shanghai',80)])
u

array([['beijing', '100'],
       ['beijing', '89'],
       ['hubei', '75'],
       ['shanghai', '80']], dtype='<U8')

In [21]:
pd.DataFrame(u,index=['PKU','Tsinghua','WHU','Fudan'],columns=['city','marks'])

Unnamed: 0,city,marks
PKU,beijing,100
Tsinghua,beijing,89
WHU,hubei,75
Fudan,shanghai,80


In [22]:
# 数据转换
dict_gdp={'GDP':[27466,24899.30,19610.9,19526.3],'Population':[2419.8,2172.9,1350.1,1138.3]}  # 这个dict中的值为array-like类型
pd.DataFrame.from_dict(dict_gdp)

Unnamed: 0,GDP,Population
0,27466.0,2419.8
1,24899.3,2172.9
2,19610.9,1350.1
3,19526.3,1138.3


In [23]:
pd.DataFrame.from_dict(dict_gdp,orient='index')

Unnamed: 0,0,1,2,3
GDP,27466.0,24899.3,19610.9,19526.3
Population,2419.8,2172.9,1350.1,1138.3


In [24]:
dict2_gdp={'GDP':{'SHANGHAI':27466.2,'BEIJING':24899.3,'GUANGZHOU':19610.9,'SHENZHEN':19492.6},'Population':{'SHANGHAI':419.7,'BEIJING':2172.9,'GUANGZHOU':1350.11,'SHENZHEN':1137.8}}
dict2_gdp  # 此dict中的内容为dict类型
# dict_gdp.items()

{'GDP': {'SHANGHAI': 27466.2,
  'BEIJING': 24899.3,
  'GUANGZHOU': 19610.9,
  'SHENZHEN': 19492.6},
 'Population': {'SHANGHAI': 419.7,
  'BEIJING': 2172.9,
  'GUANGZHOU': 1350.11,
  'SHENZHEN': 1137.8}}

In [25]:
gp=pd.DataFrame.from_dict(dict2_gdp)
gp

Unnamed: 0,GDP,Population
BEIJING,24899.3,2172.9
GUANGZHOU,19610.9,1350.11
SHANGHAI,27466.2,419.7
SHENZHEN,19492.6,1137.8


In [26]:
pd.DataFrame.from_dict(dict_gdp,orient='index')

Unnamed: 0,0,1,2,3
GDP,27466.0,24899.3,19610.9,19526.3
Population,2419.8,2172.9,1350.1,1138.3


In [27]:
items_gdp=dict_gdp.items()
items_gdp

dict_items([('GDP', [27466, 24899.3, 19610.9, 19526.3]), ('Population', [2419.8, 2172.9, 1350.1, 1138.3])])

In [28]:
pd.DataFrame.from_items(items_gdp,columns=['shanghai','beijing','guangzhou','shenzhen'],orient='index')

  """Entry point for launching an IPython kernel.


Unnamed: 0,shanghai,beijing,guangzhou,shenzhen
GDP,27466.0,24899.3,19610.9,19526.3
Population,2419.8,2172.9,1350.1,1138.3


In [29]:
gp

Unnamed: 0,GDP,Population
BEIJING,24899.3,2172.9
GUANGZHOU,19610.9,1350.11
SHANGHAI,27466.2,419.7
SHENZHEN,19492.6,1137.8


In [30]:
gp.to_csv('gp.csv',columns=['GDP','Population'],index_label=['shanghai','guangzhou','beijing','shenzhen'],header=False)

In [32]:
# Panel 三维数据结构对象
a=np.random.rand(3,4,5)
p=pd.Panel(a)
p

Panel is deprecated and will be removed in a future version.
The recommended way to represent these types of 3-dimensional data are with a MultiIndex on a DataFrame, via the Panel.to_frame() method
Alternatively, you can use the xarray package http://xarray.pydata.org/en/stable/.
Pandas provides a `.to_xarray()` method to help automate this conversion.

  exec(code_obj, self.user_global_ns, self.user_ns)


<class 'pandas.core.panel.Panel'>
Dimensions: 3 (items) x 4 (major_axis) x 5 (minor_axis)
Items axis: 0 to 2
Major_axis axis: 0 to 3
Minor_axis axis: 0 to 4

In [34]:
cdf=pd.DataFrame([{'Average Temperature':11.8,'Altitude':37},{'Average Temperature':22.8,'Altitude':39}],index=['Beijing','HongKong'])
cdf

Unnamed: 0,Altitude,Average Temperature
Beijing,37,11.8
HongKong,39,22.8


In [38]:
udf=pd.DataFrame([{'Average Temperature':9.8,'Altitude':35},{'Average Temperature':12.8,'Altitude':3},{'Average Temperature':13.9,'Altitude':16}],index=['Chicago','New York','San Fran'])
udf

Unnamed: 0,Altitude,Average Temperature
Chicago,35,9.8
New York,3,12.8
San Fran,16,13.9


In [39]:
ncaa=pd.Panel({'China':cdf,'USA':udf})
ncaa

Panel is deprecated and will be removed in a future version.
The recommended way to represent these types of 3-dimensional data are with a MultiIndex on a DataFrame, via the Panel.to_frame() method
Alternatively, you can use the xarray package http://xarray.pydata.org/en/stable/.
Pandas provides a `.to_xarray()` method to help automate this conversion.

  exec(code_obj, self.user_global_ns, self.user_ns)
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  minor_axis=minor_axis, copy=copy, dtype=dtype)


<class 'pandas.core.panel.Panel'>
Dimensions: 2 (items) x 5 (major_axis) x 2 (minor_axis)
Items axis: China to USA
Major_axis axis: Beijing to San Fran
Minor_axis axis: Altitude to Average Temperature

In [40]:
p=pd.Panel(data=a,items=['itema','itemb','itemc'],major_axis=['a','b','c','d'],minor_axis=['one','two','three','four','five'])
p

Panel is deprecated and will be removed in a future version.
The recommended way to represent these types of 3-dimensional data are with a MultiIndex on a DataFrame, via the Panel.to_frame() method
Alternatively, you can use the xarray package http://xarray.pydata.org/en/stable/.
Pandas provides a `.to_xarray()` method to help automate this conversion.

  exec(code_obj, self.user_global_ns, self.user_ns)


<class 'pandas.core.panel.Panel'>
Dimensions: 3 (items) x 4 (major_axis) x 5 (minor_axis)
Items axis: itema to itemc
Major_axis axis: a to d
Minor_axis axis: one to five

In [41]:
p['itema']

Unnamed: 0,one,two,three,four,five
a,0.118954,0.664107,0.406437,0.283662,0.920923
b,0.233804,0.39698,0.218353,0.070871,0.003804
c,0.372199,0.627905,0.626309,0.398368,0.451102
d,0.529269,0.519716,0.888828,0.096722,0.152849


In [42]:
p['itemb']

Unnamed: 0,one,two,three,four,five
a,0.770008,0.891464,0.050106,0.858964,0.743703
b,0.373651,0.491186,0.121224,0.54241,0.518896
c,0.747169,0.480966,0.58766,0.637211,0.509398
d,0.126489,0.68981,0.848019,0.729179,0.828027


In [43]:
p.major_xs('a')

Unnamed: 0,itema,itemb,itemc
one,0.118954,0.770008,0.558889
two,0.664107,0.891464,0.80303
three,0.406437,0.050106,0.031981
four,0.283662,0.858964,0.926703
five,0.920923,0.743703,0.782826


In [47]:
p.minor_xs('one')

Unnamed: 0,itema,itemb,itemc
a,0.118954,0.770008,0.558889
b,0.233804,0.373651,0.175552
c,0.372199,0.747169,0.986586
d,0.529269,0.126489,0.596867
