In [1]:
import pandas as pd
import numpy as np

df = pd.DataFrame({'name': ['ray', 'jack', 'lucy', 'bob', 'candy'],
                   'age': [10, 30, 20, 15, 50],
                   'city': ['hangzhou', 'beijing', 'hangzhou', 'chengdu', 'suzhou']},
                  columns=['name', 'city', 'age'])
print(df)

    name      city  age
0    ray  hangzhou   10
1   jack   beijing   30
2   lucy  hangzhou   20
3    bob   chengdu   15
4  candy    suzhou   50


In [2]:
# 选取行操作
print(df.loc[4])        # 返回Series形式，根据索引标签选取
print(df.loc[[4]])      # 返回DataFrame形式

name     candy
city    suzhou
age         50
Name: 4, dtype: object
    name    city  age
4  candy  suzhou   50


In [3]:
# 添加行
df.loc[5] = ['baby', 'shanghai', 40]
print(df)

    name      city  age
0    ray  hangzhou   10
1   jack   beijing   30
2   lucy  hangzhou   20
3    bob   chengdu   15
4  candy    suzhou   50
5   baby  shanghai   40


In [4]:
df1 = pd.DataFrame({'name': ['li', 'wang'], 'city': ['guangzhou', 'shenzhen'], 'age': [35, 56]})
print(df.append(df1))
print('------')
print(df.append(df1, ignore_index=True))
print('-------')
print(pd.concat([df, df1], ignore_index=True))

    name       city  age
0    ray   hangzhou   10
1   jack    beijing   30
2   lucy   hangzhou   20
3    bob    chengdu   15
4  candy     suzhou   50
5   baby   shanghai   40
0     li  guangzhou   35
1   wang   shenzhen   56
------
    name       city  age
0    ray   hangzhou   10
1   jack    beijing   30
2   lucy   hangzhou   20
3    bob    chengdu   15
4  candy     suzhou   50
5   baby   shanghai   40
6     li  guangzhou   35
7   wang   shenzhen   56
-------
    name       city  age
0    ray   hangzhou   10
1   jack    beijing   30
2   lucy   hangzhou   20
3    bob    chengdu   15
4  candy     suzhou   50
5   baby   shanghai   40
6     li  guangzhou   35
7   wang   shenzhen   56


In [5]:
# 删除行
print(df.drop(5))       # 可以原处修改加inplace=True，默认axis=0
print('-------')
print(df.drop([3, 5]))

    name      city  age
0    ray  hangzhou   10
1   jack   beijing   30
2   lucy  hangzhou   20
3    bob   chengdu   15
4  candy    suzhou   50
-------
    name      city  age
0    ray  hangzhou   10
1   jack   beijing   30
2   lucy  hangzhou   20
4  candy    suzhou   50


In [6]:
# 修改行
df.loc[0] = ['demon', 'hangzhou', 33]   # df.loc[0:3]=[['demon', 'asd', 'fg'],['hangzhou', 'sdg', 'zxc'], [33, 34, 56]]
print(df)

    name      city  age
0  demon  hangzhou   33
1   jack   beijing   30
2   lucy  hangzhou   20
3    bob   chengdu   15
4  candy    suzhou   50
5   baby  shanghai   40


In [7]:
# 增加列
df['sex'] = ['nan', 'nv', 'nan', 'nv', 'nan', 'nan']        # 末尾增加列
df.insert(1, 'height', [170, 168, 188, 175, 190, 169])      # 任意位置插入新列
print(df)
# 修改列 df['sex'] = 'nan'

    name  height      city  age  sex
0  demon     170  hangzhou   33  nan
1   jack     168   beijing   30   nv
2   lucy     188  hangzhou   20  nan
3    bob     175   chengdu   15   nv
4  candy     190    suzhou   50  nan
5   baby     169  shanghai   40  nan


In [8]:
# 删除列
df.drop(['sex'], axis=1)
# del df['height']  在原处修改
print(df)

    name  height      city  age  sex
0  demon     170  hangzhou   33  nan
1   jack     168   beijing   30   nv
2   lucy     188  hangzhou   20  nan
3    bob     175   chengdu   15   nv
4  candy     190    suzhou   50  nan
5   baby     169  shanghai   40  nan


In [9]:
df1 = pd.DataFrame({'name': ['jack', 'lucy'], 'height': [178, 188]})
a = pd.merge(df, df1, on='name', how='inner')   # 指定列名name，inner交集，outer并集，left保左加右，right保右加左
# pd.merge(df_1, df_2, left_on='name_1', right_on='name_2', how='inner')    列名不一样时 
# 多个键 on=['name', 'age']
print(a)

left = pd.DataFrame({'key': list('acba'), 'value': range(4)})
right = pd.DataFrame({'value': [10, 20]}, index=['a', 'b'])
print(pd.merge(left, right, left_on='key', right_index=True, how='inner'))  # 将索引作为键来合并

   name  height_x      city  age  sex  height_y
0  jack       168   beijing   30   nv       178
1  lucy       188  hangzhou   20  nan       188
  key  value_x  value_y
0   a        0       10
3   a        3       10
2   b        2       20


In [10]:
# df.combine_first(df2) 相当于df2给df重叠部分的缺失值打补丁
print(df)

    name  height      city  age  sex
0  demon     170  hangzhou   33  nan
1   jack     168   beijing   30   nv
2   lucy     188  hangzhou   20  nan
3    bob     175   chengdu   15   nv
4  candy     190    suzhou   50  nan
5   baby     169  shanghai   40  nan


In [11]:
# 数据分组
df['员工等级'] = np.where(df['age'] <= 30, '新员工', '老员工')        # 划分两组
# df.loc[df[age]<=30 & df[sex] = 'nv', '性别划分'] = '新女员工'       划分多组
print(df)

    name  height      city  age  sex 员工等级
0  demon     170  hangzhou   33  nan  老员工
1   jack     168   beijing   30   nv  新员工
2   lucy     188  hangzhou   20  nan  新员工
3    bob     175   chengdu   15   nv  新员工
4  candy     190    suzhou   50  nan  老员工
5   baby     169  shanghai   40  nan  老员工


In [12]:
# 拆分
# df1 = pd.DataFrame([i.split('-') for i in df['height_age']], index=df.index, columns=['height','age']) 
# 合并
# df['height_age'] = df['height'].apply(str)+ '-' +df['age'].apply(str)         apply函数用于DataFrame的行或列

In [13]:
# 排序
# 根据某个字段的值进行排序
print(df.sort_values(by='age'))     # 默认升序，ascending=False降序
# print(df.sort_values(by=['age', 'name'],ascending=[True,False])) age为一级关键字升序，name为二级关键字降序
print('-------')
print(df.sort_index())      # 根据索引排序

    name  height      city  age  sex 员工等级
3    bob     175   chengdu   15   nv  新员工
2   lucy     188  hangzhou   20  nan  新员工
1   jack     168   beijing   30   nv  新员工
0  demon     170  hangzhou   33  nan  老员工
5   baby     169  shanghai   40  nan  老员工
4  candy     190    suzhou   50  nan  老员工
-------
    name  height      city  age  sex 员工等级
0  demon     170  hangzhou   33  nan  老员工
1   jack     168   beijing   30   nv  新员工
2   lucy     188  hangzhou   20  nan  新员工
3    bob     175   chengdu   15   nv  新员工
4  candy     190    suzhou   50  nan  老员工
5   baby     169  shanghai   40  nan  老员工


In [14]:
# 排名
df['age_rank'] = df['age'].rank(method='dense')         # 中国式排名
print(df)

    name  height      city  age  sex 员工等级  age_rank
0  demon     170  hangzhou   33  nan  老员工       4.0
1   jack     168   beijing   30   nv  新员工       3.0
2   lucy     188  hangzhou   20  nan  新员工       2.0
3    bob     175   chengdu   15   nv  新员工       1.0
4  candy     190    suzhou   50  nan  老员工       6.0
5   baby     169  shanghai   40  nan  老员工       5.0


In [15]:
print(df1)
print('-----')
print(df1.T)        # 转置

   name  height
0  jack     178
1  lucy     188
-----
           0     1
name    jack  lucy
height   178   188
