# series
## series的创建

In [1]:
import pandas as pd
import numpy as np

In [2]:
pd.Series(np.arange(9))

#data：传入的数据，可以是ndarray、list等
#index：索引，必须是唯一的，且与数据的长度相等。如果没有传入索引参数，则默认会自动创建一个从0-N的整数索引。
#dtype：数据的类型

0    0
1    1
2    2
3    3
4    4
5    5
6    6
7    7
8    8
dtype: int32

In [3]:
pd.Series([1.2, 2.3, 4.5, 5.0], index=[1, 2, 3, 4])

1    1.2
2    2.3
3    4.5
4    5.0
dtype: float64

In [4]:
color_count = pd.Series({"red":10, "green":20, "blue":100})

In [5]:
color_count

red       10
green     20
blue     100
dtype: int64

## series的属性

In [6]:
color_count.index

Index(['red', 'green', 'blue'], dtype='object')

In [7]:
color_count.values

array([ 10,  20, 100], dtype=int64)

In [8]:
color_count[0]

10

In [9]:
color_count[1]

20

In [10]:
color_count.value_counts

<bound method IndexOpsMixin.value_counts of red       10
green     20
blue     100
dtype: int64>

# DataFrame
## DataFrame创建

In [11]:
pd.DataFrame(np.random.randn(2,3))

Unnamed: 0,0,1,2
0,-1.195125,-1.017966,-0.739015
1,2.705541,0.260941,-0.167092


In [12]:
score = np.random.randint(40, 100, (10, 5))

In [13]:
score

array([[92, 99, 92, 96, 80],
       [81, 80, 68, 70, 79],
       [80, 94, 95, 85, 63],
       [69, 71, 98, 52, 60],
       [72, 99, 75, 86, 89],
       [65, 44, 72, 41, 45],
       [70, 93, 73, 54, 74],
       [41, 40, 63, 45, 46],
       [56, 46, 56, 67, 71],
       [55, 67, 68, 59, 63]])

In [14]:
score_df = pd.DataFrame(score)

In [15]:
score_df

Unnamed: 0,0,1,2,3,4
0,92,99,92,96,80
1,81,80,68,70,79
2,80,94,95,85,63
3,69,71,98,52,60
4,72,99,75,86,89
5,65,44,72,41,45
6,70,93,73,54,74
7,41,40,63,45,46
8,56,46,56,67,71
9,55,67,68,59,63


In [16]:
subjects = ["语文", "数学", "英语", "政治", "体育"]

stu = ["同学"+ str(i) for i in range(score_df.shape[0])]

data = pd.DataFrame(score, columns=subjects, index=stu)

In [17]:
stu

['同学0', '同学1', '同学2', '同学3', '同学4', '同学5', '同学6', '同学7', '同学8', '同学9']

In [18]:
data

Unnamed: 0,语文,数学,英语,政治,体育
同学0,92,99,92,96,80
同学1,81,80,68,70,79
同学2,80,94,95,85,63
同学3,69,71,98,52,60
同学4,72,99,75,86,89
同学5,65,44,72,41,45
同学6,70,93,73,54,74
同学7,41,40,63,45,46
同学8,56,46,56,67,71
同学9,55,67,68,59,63


## DataFrame的属性

In [19]:
data.shape

(10, 5)

In [20]:
data.index

Index(['同学0', '同学1', '同学2', '同学3', '同学4', '同学5', '同学6', '同学7', '同学8', '同学9'], dtype='object')

In [21]:
data.columns

Index(['语文', '数学', '英语', '政治', '体育'], dtype='object')

In [22]:
data.values

array([[92, 99, 92, 96, 80],
       [81, 80, 68, 70, 79],
       [80, 94, 95, 85, 63],
       [69, 71, 98, 52, 60],
       [72, 99, 75, 86, 89],
       [65, 44, 72, 41, 45],
       [70, 93, 73, 54, 74],
       [41, 40, 63, 45, 46],
       [56, 46, 56, 67, 71],
       [55, 67, 68, 59, 63]])

In [23]:
data.T

Unnamed: 0,同学0,同学1,同学2,同学3,同学4,同学5,同学6,同学7,同学8,同学9
语文,92,81,80,69,72,65,70,41,56,55
数学,99,80,94,71,99,44,93,40,46,67
英语,92,68,95,98,75,72,73,63,56,68
政治,96,70,85,52,86,41,54,45,67,59
体育,80,79,63,60,89,45,74,46,71,63


In [24]:
data

Unnamed: 0,语文,数学,英语,政治,体育
同学0,92,99,92,96,80
同学1,81,80,68,70,79
同学2,80,94,95,85,63
同学3,69,71,98,52,60
同学4,72,99,75,86,89
同学5,65,44,72,41,45
同学6,70,93,73,54,74
同学7,41,40,63,45,46
同学8,56,46,56,67,71
同学9,55,67,68,59,63


In [25]:
data.head()#head(5)：显示前5行内容如果不补充参数，默认5行。填入参数N则显示前N行

Unnamed: 0,语文,数学,英语,政治,体育
同学0,92,99,92,96,80
同学1,81,80,68,70,79
同学2,80,94,95,85,63
同学3,69,71,98,52,60
同学4,72,99,75,86,89


In [26]:
data.head(3)

Unnamed: 0,语文,数学,英语,政治,体育
同学0,92,99,92,96,80
同学1,81,80,68,70,79
同学2,80,94,95,85,63


In [27]:
data.tail(3)#tail(5):显示后5行内容,如果不补充参数，默认5行。填入参数N则显示后N行

Unnamed: 0,语文,数学,英语,政治,体育
同学7,41,40,63,45,46
同学8,56,46,56,67,71
同学9,55,67,68,59,63


## DataFrame索引值的设置

In [28]:
stu = ["同学_"+ str(i) for i in range(score_df.shape[0])]

data.index = stu

In [29]:
# stu

In [30]:
data

Unnamed: 0,语文,数学,英语,政治,体育
同学_0,92,99,92,96,80
同学_1,81,80,68,70,79
同学_2,80,94,95,85,63
同学_3,69,71,98,52,60
同学_4,72,99,75,86,89
同学_5,65,44,72,41,45
同学_6,70,93,73,54,74
同学_7,41,40,63,45,46
同学_8,56,46,56,67,71
同学_9,55,67,68,59,63


In [31]:
# data.index[2] = "同学__"

In [32]:
data.reset_index() # reset_index(drop=False)设置新的下标索引drop: 默认为False，不删除原来索引，
                             # 如果为True,删除原来的索引值 

Unnamed: 0,index,语文,数学,英语,政治,体育
0,同学_0,92,99,92,96,80
1,同学_1,81,80,68,70,79
2,同学_2,80,94,95,85,63
3,同学_3,69,71,98,52,60
4,同学_4,72,99,75,86,89
5,同学_5,65,44,72,41,45
6,同学_6,70,93,73,54,74
7,同学_7,41,40,63,45,46
8,同学_8,56,46,56,67,71
9,同学_9,55,67,68,59,63


In [33]:
data.reset_index(drop=True)

Unnamed: 0,语文,数学,英语,政治,体育
0,92,99,92,96,80
1,81,80,68,70,79
2,80,94,95,85,63
3,69,71,98,52,60
4,72,99,75,86,89
5,65,44,72,41,45
6,70,93,73,54,74
7,41,40,63,45,46
8,56,46,56,67,71
9,55,67,68,59,63


In [34]:
df = pd.DataFrame({'month': [1, 4, 7, 10],
                    'year': [2012, 2014, 2013, 2014],
                    'sale':[55, 40, 84, 31]})

In [35]:
df

Unnamed: 0,month,year,sale
0,1,2012,55
1,4,2014,40
2,7,2013,84
3,10,2014,31


In [36]:
df.set_index("year")

Unnamed: 0_level_0,month,sale
year,Unnamed: 1_level_1,Unnamed: 2_level_1
2012,1,55
2014,4,40
2013,7,84
2014,10,31


In [37]:
df = df.set_index(["year", "month"])

In [38]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,sale
year,month,Unnamed: 2_level_1
2012,1,55
2014,4,40
2013,7,84
2014,10,31


# MultiIndex与Panel
## MultiIndex

In [39]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,sale
year,month,Unnamed: 2_level_1
2012,1,55
2014,4,40
2013,7,84
2014,10,31


In [40]:
df.index

MultiIndex([(2012,  1),
            (2014,  4),
            (2013,  7),
            (2014, 10)],
           names=['year', 'month'])

In [41]:
df.index.names

FrozenList(['year', 'month'])

In [42]:
df.index.levels

FrozenList([[2012, 2013, 2014], [1, 4, 7, 10]])

In [43]:
arrays = [[1, 1, 2, 2], ["r", "b", "r","b"]]
pd.MultiIndex.from_arrays(arrays, names=("num", "col"))

MultiIndex([(1, 'r'),
            (1, 'b'),
            (2, 'r'),
            (2, 'b')],
           names=['num', 'col'])

## panel

In [44]:
p = pd.Panel(data=np.arange(24).reshape(4,3,2),
                 items=list('ABCD'),
                 major_axis=pd.date_range('20130101', periods=3),
                 minor_axis=['first', 'second'])

AttributeError: module 'pandas' has no attribute 'Panel'

In [None]:
p

In [None]:
p[:, :, "second"]

In [None]:
p[:, :, "first"]

In [None]:
p["A", :, :]