In [1]:
import pandas as pd
import numpy as np

In [2]:
data = pd.DataFrame({'a':[1,2,3,4],
                     'b':list('abcd'),
                     'c':9,
                     'd':pd.Series([5,6,7,8])})

In [3]:
data  # abcd列索引

Unnamed: 0,a,b,c,d
0,1,a,9,5
1,2,b,9,6
2,3,c,9,7
3,4,d,9,8


In [5]:
data.columns # 列索引

Index(['a', 'b', 'c', 'd'], dtype='object')

In [6]:
data.index # 行索引

RangeIndex(start=0, stop=4, step=1)

In [7]:
xuhao = ['one','two','three','four']
df = pd.DataFrame(np.random.randn(4,3), index=xuhao, columns=['a','b','c'])

In [8]:
df

Unnamed: 0,a,b,c
one,-0.327567,-0.190464,0.489604
two,0.577296,0.846016,-1.272735
three,0.297807,1.177698,-0.182066
four,-1.125733,-0.765275,1.49272


In [9]:
df.index

Index(['one', 'two', 'three', 'four'], dtype='object')

In [10]:
df.columns

Index(['a', 'b', 'c'], dtype='object')

In [11]:
df

Unnamed: 0,a,b,c
one,-0.327567,-0.190464,0.489604
two,0.577296,0.846016,-1.272735
three,0.297807,1.177698,-0.182066
four,-1.125733,-0.765275,1.49272


In [12]:
df['a']

one     -0.327567
two      0.577296
three    0.297807
four    -1.125733
Name: a, dtype: float64

In [13]:
df.b

one     -0.190464
two      0.846016
three    1.177698
four    -0.765275
Name: b, dtype: float64

In [14]:
df[['a', 'c']]  #返回多列

Unnamed: 0,a,c
one,-0.327567,0.489604
two,0.577296,-1.272735
three,0.297807,-0.182066
four,-1.125733,1.49272


In [15]:
type(df.b)

pandas.core.series.Series

In [16]:
type(df[['a', 'c']])

pandas.core.frame.DataFrame

In [17]:
df.head(3)

Unnamed: 0,a,b,c
one,-0.327567,-0.190464,0.489604
two,0.577296,0.846016,-1.272735
three,0.297807,1.177698,-0.182066


In [18]:
df.tail(2)

Unnamed: 0,a,b,c
three,0.297807,1.177698,-0.182066
four,-1.125733,-0.765275,1.49272


In [19]:
df.values  #返回array

array([[-0.3275668 , -0.19046443,  0.48960352],
       [ 0.57729616,  0.8460159 , -1.27273538],
       [ 0.29780732,  1.17769787, -0.18206624],
       [-1.1257328 , -0.76527496,  1.49272042]])

# loc按索引取值

In [20]:
df.loc['one']

a   -0.327567
b   -0.190464
c    0.489604
Name: one, dtype: float64

In [22]:
df.loc[['one','four']]  #取多个索引，需要输入一个列表

Unnamed: 0,a,b,c
one,-0.327567,-0.190464,0.489604
four,-1.125733,-0.765275,1.49272


In [24]:
df.loc['one':'three']  # 包含最后一列

Unnamed: 0,a,b,c
one,-0.327567,-0.190464,0.489604
two,0.577296,0.846016,-1.272735
three,0.297807,1.177698,-0.182066


In [25]:
df.loc['one':'three',['a','b']]

Unnamed: 0,a,b
one,-0.327567,-0.190464
two,0.577296,0.846016
three,0.297807,1.177698


In [26]:
df.a

one     -0.327567
two      0.577296
three    0.297807
four    -1.125733
Name: a, dtype: float64

In [28]:
df.loc['one':'three','a':'c']

Unnamed: 0,a,b,c
one,-0.327567,-0.190464,0.489604
two,0.577296,0.846016,-1.272735
three,0.297807,1.177698,-0.182066


In [30]:
df.loc[['one','four'], 'a':'c']

Unnamed: 0,a,b,c
one,-0.327567,-0.190464,0.489604
four,-1.125733,-0.765275,1.49272


In [31]:
df.loc['two','b']  # 前面是行，后面是列

0.8460159027597443

In [32]:
df

Unnamed: 0,a,b,c
one,-0.327567,-0.190464,0.489604
two,0.577296,0.846016,-1.272735
three,0.297807,1.177698,-0.182066
four,-1.125733,-0.765275,1.49272


In [33]:
df.loc[:'two','b':]

Unnamed: 0,b,c
one,-0.190464,0.489604
two,0.846016,-1.272735


# iloc

In [34]:
df.iloc[1]  # 按位取值

a    0.577296
b    0.846016
c   -1.272735
Name: two, dtype: float64

In [36]:
df.iloc[1:]

Unnamed: 0,a,b,c
two,0.577296,0.846016,-1.272735
three,0.297807,1.177698,-0.182066
four,-1.125733,-0.765275,1.49272


In [37]:
df.iloc[1:, 0]  # 按位取值,不包含最后一位

two      0.577296
three    0.297807
four    -1.125733
Name: a, dtype: float64

In [39]:
df.iloc[:2, 1:]

Unnamed: 0,b,c
one,-0.190464,0.489604
two,0.846016,-1.272735


In [41]:
df.iloc[:2, [0,2]]

Unnamed: 0,a,c
one,-0.327567,0.489604
two,0.577296,-1.272735


In [42]:
df.iloc[2,1]

1.1776978729086762

In [43]:
df.iloc[2,1]=1000

In [44]:
df

Unnamed: 0,a,b,c
one,-0.327567,-0.190464,0.489604
two,0.577296,0.846016,-1.272735
three,0.297807,1000.0,-0.182066
four,-1.125733,-0.765275,1.49272


# 添加和删除行和列

In [45]:
df

Unnamed: 0,a,b,c
one,-0.327567,-0.190464,0.489604
two,0.577296,0.846016,-1.272735
three,0.297807,1000.0,-0.182066
four,-1.125733,-0.765275,1.49272


In [46]:
df>0

Unnamed: 0,a,b,c
one,False,False,True
two,True,True,False
three,True,True,False
four,False,False,True


In [47]:
df[df>0]

Unnamed: 0,a,b,c
one,,,0.489604
two,0.577296,0.846016,
three,0.297807,1000.0,
four,,,1.49272


In [49]:
df[df<0] = 0

In [50]:
df

Unnamed: 0,a,b,c
one,0.0,0.0,0.489604
two,0.577296,0.846016,0.0
three,0.297807,1000.0,0.0
four,0.0,0.0,1.49272


In [52]:
df[df.b > 1]

Unnamed: 0,a,b,c
three,0.297807,1000.0,0.0


In [53]:
df

Unnamed: 0,a,b,c
one,0.0,0.0,0.489604
two,0.577296,0.846016,0.0
three,0.297807,1000.0,0.0
four,0.0,0.0,1.49272


In [55]:
df['d'] = 4  # 添加列

In [56]:
df['e'] = np.arange(4)

In [58]:
df

Unnamed: 0,a,b,c,d,e
one,0.0,0.0,0.489604,4,0
two,0.577296,0.846016,0.0,4,1
three,0.297807,1000.0,0.0,4,2
four,0.0,0.0,1.49272,4,3


In [59]:
df['f'] = pd.Series([2,3,4,5], index=['one', 'three', 'four', 'five'])

In [60]:
df

Unnamed: 0,a,b,c,d,e,f
one,0.0,0.0,0.489604,4,0,2.0
two,0.577296,0.846016,0.0,4,1,
three,0.297807,1000.0,0.0,4,2,3.0
four,0.0,0.0,1.49272,4,3,4.0


In [62]:
del df['e']  # 删除列

In [63]:
df

Unnamed: 0,a,b,c,d,f
one,0.0,0.0,0.489604,4,2.0
two,0.577296,0.846016,0.0,4,
three,0.297807,1000.0,0.0,4,3.0
four,0.0,0.0,1.49272,4,4.0


In [64]:
df.h = 5  # 并不会添加，要使用中括号，正式的列名

In [65]:
df

Unnamed: 0,a,b,c,d,f
one,0.0,0.0,0.489604,4,2.0
two,0.577296,0.846016,0.0,4,
three,0.297807,1000.0,0.0,4,3.0
four,0.0,0.0,1.49272,4,4.0


In [66]:
df.loc['five'] = 10  # 添加行

In [67]:
df

Unnamed: 0,a,b,c,d,f
one,0.0,0.0,0.489604,4,2.0
two,0.577296,0.846016,0.0,4,
three,0.297807,1000.0,0.0,4,3.0
four,0.0,0.0,1.49272,4,4.0
five,10.0,10.0,10.0,10,10.0


In [68]:
df.drop('one')  # 删除行

Unnamed: 0,a,b,c,d,f
two,0.577296,0.846016,0.0,4,
three,0.297807,1000.0,0.0,4,3.0
four,0.0,0.0,1.49272,4,4.0
five,10.0,10.0,10.0,10,10.0


In [69]:
df = df.drop('one')

In [70]:
df

Unnamed: 0,a,b,c,d,f
two,0.577296,0.846016,0.0,4,
three,0.297807,1000.0,0.0,4,3.0
four,0.0,0.0,1.49272,4,4.0
five,10.0,10.0,10.0,10,10.0


In [73]:
df = df.drop(['four','five'])

KeyError: "['four', 'five'] not found in axis"

In [74]:
df[['a','b']]

Unnamed: 0,a,b
two,0.577296,0.846016
three,0.297807,1000.0


In [77]:
df.drop(columns=['c','f'])  # 删除列

Unnamed: 0,a,b,d
two,0.577296,0.846016,4
three,0.297807,1000.0,4


In [78]:
df

Unnamed: 0,a,b,c,d,f
two,0.577296,0.846016,0.0,4,
three,0.297807,1000.0,0.0,4,3.0


# 排序与数据对齐

In [80]:
num = ['one', 'two', 'three', 'four', 'five', 'six', 'seven']
data = pd.DataFrame(np.random.randn(7,4), index=num, columns=list('abcd'))

In [81]:
data

Unnamed: 0,a,b,c,d
one,0.695007,0.712372,0.739108,-0.932024
two,-1.52455,0.94524,1.424037,-0.262905
three,0.110267,0.872907,-1.592445,-0.063497
four,-1.507138,0.020152,-0.14589,0.67317
five,-0.398889,1.113829,0.5039,0.288276
six,-0.852355,0.654533,1.220746,2.075212
seven,-0.551279,1.460979,-1.77064,-0.379884


In [82]:
data.shape

(7, 4)

In [83]:
np.shape(data)

(7, 4)

In [84]:
data.T

Unnamed: 0,one,two,three,four,five,six,seven
a,0.695007,-1.52455,0.110267,-1.507138,-0.398889,-0.852355,-0.551279
b,0.712372,0.94524,0.872907,0.020152,1.113829,0.654533,1.460979
c,0.739108,1.424037,-1.592445,-0.14589,0.5039,1.220746,-1.77064
d,-0.932024,-0.262905,-0.063497,0.67317,0.288276,2.075212,-0.379884


In [85]:
data.T.shape

(4, 7)

In [86]:
data

Unnamed: 0,a,b,c,d
one,0.695007,0.712372,0.739108,-0.932024
two,-1.52455,0.94524,1.424037,-0.262905
three,0.110267,0.872907,-1.592445,-0.063497
four,-1.507138,0.020152,-0.14589,0.67317
five,-0.398889,1.113829,0.5039,0.288276
six,-0.852355,0.654533,1.220746,2.075212
seven,-0.551279,1.460979,-1.77064,-0.379884


In [87]:
data.sort_index(axis=1, ascending=False)  # 默认升序，ascending=False 降序

Unnamed: 0,d,c,b,a
one,-0.932024,0.739108,0.712372,0.695007
two,-0.262905,1.424037,0.94524,-1.52455
three,-0.063497,-1.592445,0.872907,0.110267
four,0.67317,-0.14589,0.020152,-1.507138
five,0.288276,0.5039,1.113829,-0.398889
six,2.075212,1.220746,0.654533,-0.852355
seven,-0.379884,-1.77064,1.460979,-0.551279


In [88]:
data

Unnamed: 0,a,b,c,d
one,0.695007,0.712372,0.739108,-0.932024
two,-1.52455,0.94524,1.424037,-0.262905
three,0.110267,0.872907,-1.592445,-0.063497
four,-1.507138,0.020152,-0.14589,0.67317
five,-0.398889,1.113829,0.5039,0.288276
six,-0.852355,0.654533,1.220746,2.075212
seven,-0.551279,1.460979,-1.77064,-0.379884


In [89]:
data.sort_index(axis=1, ascending=False, inplace=True)

In [90]:
data.sort_index(axis=0, ascending=False)

Unnamed: 0,d,c,b,a
two,-0.262905,1.424037,0.94524,-1.52455
three,-0.063497,-1.592445,0.872907,0.110267
six,2.075212,1.220746,0.654533,-0.852355
seven,-0.379884,-1.77064,1.460979,-0.551279
one,-0.932024,0.739108,0.712372,0.695007
four,0.67317,-0.14589,0.020152,-1.507138
five,0.288276,0.5039,1.113829,-0.398889


In [91]:
data

Unnamed: 0,d,c,b,a
one,-0.932024,0.739108,0.712372,0.695007
two,-0.262905,1.424037,0.94524,-1.52455
three,-0.063497,-1.592445,0.872907,0.110267
four,0.67317,-0.14589,0.020152,-1.507138
five,0.288276,0.5039,1.113829,-0.398889
six,2.075212,1.220746,0.654533,-0.852355
seven,-0.379884,-1.77064,1.460979,-0.551279


In [92]:
data.sort_values(by='c')

Unnamed: 0,d,c,b,a
seven,-0.379884,-1.77064,1.460979,-0.551279
three,-0.063497,-1.592445,0.872907,0.110267
four,0.67317,-0.14589,0.020152,-1.507138
five,0.288276,0.5039,1.113829,-0.398889
one,-0.932024,0.739108,0.712372,0.695007
six,2.075212,1.220746,0.654533,-0.852355
two,-0.262905,1.424037,0.94524,-1.52455


In [93]:
data

Unnamed: 0,d,c,b,a
one,-0.932024,0.739108,0.712372,0.695007
two,-0.262905,1.424037,0.94524,-1.52455
three,-0.063497,-1.592445,0.872907,0.110267
four,0.67317,-0.14589,0.020152,-1.507138
five,0.288276,0.5039,1.113829,-0.398889
six,2.075212,1.220746,0.654533,-0.852355
seven,-0.379884,-1.77064,1.460979,-0.551279


## 索引重新排序

In [95]:
data.reindex(columns=['b','c','a','e','d'])

Unnamed: 0,b,c,a,e,d
one,0.712372,0.739108,0.695007,,-0.932024
two,0.94524,1.424037,-1.52455,,-0.262905
three,0.872907,-1.592445,0.110267,,-0.063497
four,0.020152,-0.14589,-1.507138,,0.67317
five,1.113829,0.5039,-0.398889,,0.288276
six,0.654533,1.220746,-0.852355,,2.075212
seven,1.460979,-1.77064,-0.551279,,-0.379884


In [96]:
data.reindex(columns=['b','c','e','d'])

Unnamed: 0,b,c,e,d
one,0.712372,0.739108,,-0.932024
two,0.94524,1.424037,,-0.262905
three,0.872907,-1.592445,,-0.063497
four,0.020152,-0.14589,,0.67317
five,1.113829,0.5039,,0.288276
six,0.654533,1.220746,,2.075212
seven,1.460979,-1.77064,,-0.379884


## 同时行索引和列索引对齐

In [99]:
data1 = data.reindex(columns=['b','c','e','d'], index=['one', 'two', 'three', 'four', 'six', 'seven'])

In [100]:
data1

Unnamed: 0,b,c,e,d
one,0.712372,0.739108,,-0.932024
two,0.94524,1.424037,,-0.262905
three,0.872907,-1.592445,,-0.063497
four,0.020152,-0.14589,,0.67317
six,0.654533,1.220746,,2.075212
seven,1.460979,-1.77064,,-0.379884


In [101]:
data

Unnamed: 0,d,c,b,a
one,-0.932024,0.739108,0.712372,0.695007
two,-0.262905,1.424037,0.94524,-1.52455
three,-0.063497,-1.592445,0.872907,0.110267
four,0.67317,-0.14589,0.020152,-1.507138
five,0.288276,0.5039,1.113829,-0.398889
six,2.075212,1.220746,0.654533,-0.852355
seven,-0.379884,-1.77064,1.460979,-0.551279


In [103]:
data2 = data + data1

In [104]:
data2

Unnamed: 0,a,b,c,d,e
five,,,,,
four,,0.040303,-0.291781,1.346339,
one,,1.424744,1.478216,-1.864048,
seven,,2.921959,-3.541281,-0.759769,
six,,1.309065,2.441493,4.150424,
three,,1.745814,-3.184889,-0.126993,
two,,1.89048,2.848073,-0.52581,


In [105]:
np.mean(data2, axis=1)

five          NaN
four     0.364954
one      0.346304
seven   -0.459697
six      2.633661
three   -0.522023
two      1.404248
dtype: float64

In [106]:
data2.mean(1)

five          NaN
four     0.364954
one      0.346304
seven   -0.459697
six      2.633661
three   -0.522023
two      1.404248
dtype: float64

In [107]:
np.sum(data2, axis=1)

five     0.000000
four     1.094862
one      1.038912
seven   -1.379091
six      7.900982
three   -1.566068
two      4.212743
dtype: float64

In [108]:
data2.sum(1)

five     0.000000
four     1.094862
one      1.038912
seven   -1.379091
six      7.900982
three   -1.566068
two      4.212743
dtype: float64

In [109]:
data2.max(1)

five          NaN
four     1.346339
one      1.478216
seven    2.921959
six      4.150424
three    1.745814
two      2.848073
dtype: float64