# pandas索引操作

In [1]:
import pandas as pd
import numpy as np

In [2]:
dict_data = {'A': 1,
             'B': pd.Timestamp('20190926'),
             'C': pd.Series(1, index=list(range(4)), dtype='float32'),
             'D': np.array([1, 2, 3, 4], dtype='int32'),
             'E': ["Python", "Java", "C++", "C"],
             'F': 'wangdao'}
df_obj2 = pd.DataFrame(dict_data)
print(df_obj2.index)

Index([0, 1, 2, 3], dtype='int64')


In [10]:
print(df_obj2)
print('-' * 50)
df_obj2.iloc[2]

   A          B    C  D       E        F
0  1 2019-09-26  1.0  1  Python  wangdao
1  1 2019-09-26  1.0  2    Java  wangdao
2  1 2019-09-26  1.0  3     C++  wangdao
3  1 2019-09-26  1.0  4       C  wangdao
--------------------------------------------------


A                      1
B    2019-09-26 00:00:00
C                    1.0
D                      3
E                    C++
F                wangdao
Name: 2, dtype: object

In [12]:
ser_obj = pd.Series(range(5), index=list("abcde"))
print(ser_obj)
ser_obj.index

a    0
b    1
c    2
d    3
e    4
dtype: int64


Index(['a', 'b', 'c', 'd', 'e'], dtype='object')

In [14]:
# 切片索引
print(ser_obj.iloc[1:3])  #索引位置取数据，左闭右开
print(ser_obj.loc['b':'d'])  #记住索引名  左闭右闭

b    1
c    2
dtype: int64
b    1
c    2
d    3
dtype: int64


In [16]:
print(ser_obj.iloc[[1, 3]])

b    1
d    3
dtype: int64


In [17]:
ser_bool = ser_obj > 2
print(ser_obj)
print(ser_bool)


a    0
b    1
c    2
d    3
e    4
dtype: int64
a    False
b    False
c    False
d     True
e     True
dtype: bool


In [18]:
print(ser_obj[ser_bool])

d    3
e    4
dtype: int64


In [21]:
df_obj = pd.DataFrame(np.random.randn(5, 4), columns=['A', 'B', 'C', 'D'])
print(df_obj)

          A         B         C         D
0  1.135076  0.891088 -0.150878  0.830937
1 -1.209320 -0.924141 -0.791280  0.101579
2  2.120594 -1.094498  1.636591 -0.589254
3 -1.817295 -2.068245 -1.037455 -0.541862
4  0.970406 -0.079924 -0.557270 -0.411909


In [24]:
print(type(df_obj['A']))
print('-' * 50)
print(type(df_obj[['A']]))

<class 'pandas.core.series.Series'>
--------------------------------------------------
<class 'pandas.core.frame.DataFrame'>


In [25]:
print(df_obj['A']) # 返回的是series对象，修改数据会影响到原来的DataFrame对象，只能选择一列，效率较高
print('-' * 50)
print(df_obj[['A']])# 返回的是DataFrame对象，修改数据不会影响到原来的DataFrame对象，会创建副本，能选择多列，效率较低

0    1.135076
1   -1.209320
2    2.120594
3   -1.817295
4    0.970406
Name: A, dtype: float64
--------------------------------------------------
          A
0  1.135076
1 -1.209320
2  2.120594
3 -1.817295
4  0.970406


In [26]:
# DataFrame
df_obj = pd.DataFrame(np.random.randn(5,4),
                      columns = list('abcd'),
                      index=list('abcde'))
print(df_obj)

          a         b         c         d
a -0.319810  0.149251  0.823969 -0.887915
b -0.252556  0.334385 -0.219054  0.300187
c -1.308214  1.668785  0.429143  1.150986
d -0.432707 -0.233091  1.015494 -1.259830
e -0.907905 -1.114496 -1.041216 -0.286300


In [29]:
print(df_obj.loc['b':'d'])
print('-'*50)
print(df_obj.loc[['b','d'], ['a','c']])

          a         b         c         d
b -0.252556  0.334385 -0.219054  0.300187
c -1.308214  1.668785  0.429143  1.150986
d -0.432707 -0.233091  1.015494 -1.259830
--------------------------------------------------
          a         c
b -0.252556 -0.219054
d -0.432707  1.015494


In [30]:
s1 = pd.Series(range(10, 25))
s2 = pd.Series(range(5, 10))
print(s1 + s2)

0     15.0
1     17.0
2     19.0
3     21.0
4     23.0
5      NaN
6      NaN
7      NaN
8      NaN
9      NaN
10     NaN
11     NaN
12     NaN
13     NaN
14     NaN
dtype: float64


In [35]:
s2.add(s1, fill_value=0)

0     15.0
1     17.0
2     19.0
3     21.0
4     23.0
5     15.0
6     16.0
7     17.0
8     18.0
9     19.0
10    20.0
11    21.0
12    22.0
13    23.0
14    24.0
dtype: float64

In [36]:
df1 = pd.DataFrame(np.ones((2,2)), columns = ['a', 'b'])
df2 = pd.DataFrame(np.ones((3,3)), columns = ['a', 'b', 'c'])
print(df1)
print(df2)
print('-'*50)
print(df2.dtypes)
print(df1-df2)
print(df2.sub(df1, fill_value = 2)) #未对齐的数据将和填充值做运算

     a    b
0  1.0  1.0
1  1.0  1.0
     a    b    c
0  1.0  1.0  1.0
1  1.0  1.0  1.0
2  1.0  1.0  1.0
--------------------------------------------------
a    float64
b    float64
c    float64
dtype: object
     a    b   c
0  0.0  0.0 NaN
1  0.0  0.0 NaN
2  NaN  NaN NaN
     a    b    c
0  0.0  0.0 -1.0
1  0.0  0.0 -1.0
2 -1.0 -1.0 -1.0
