# pandas 基础

In [1]:
import pandas as pd
import numpy as np

## dataframe 结构

In [2]:
pd.set_option('display.max_columns', 8, 'display.max_rows', 10)

In [3]:
# ⽤read_csv()方法读取csv文件
# head()方法可以查看前五行，head(n)可以查看前n⾏
df_report = pd.read_csv('data.csv', encoding='gb2312')
df_report.head()

Unnamed: 0,里程,测点位置,设计宽度（mm）左,实测宽度（mm）左,...,设计宽度（mm）右,实测宽度（mm）右,差值（mm）右,director_name
0,63316.4,A1,-3082,-3101.0,...,3082,3116,34.19,文件名称1
1,63316.4,A2,-4000,-4003.0,...,4000,4035,35.03,文件名称2
2,63316.4,A3,-3082,-3043.0,...,3082,3092,10.5,文件名称3
3,63318.2,A1,-3082,-3105.0,...,3082,3120,38.11,文件名称4
4,63318.2,A2,-4000,-3998.0,...,4000,4039,38.46,文件名称5


## 访问dataframe组件

In [4]:
# 提取列索引
df_columns = df_report.columns
# 提取行索引
df_index = df_report.index
# 提取数据
df_data = df_report.values

In [5]:
df_columns

Index(['里程', '测点位置', '设计宽度（mm）左', '实测宽度（mm）左', '差值（mm）左', '设计宽度（mm）右',
       '实测宽度（mm）右', '差值（mm）右', 'director_name'],
      dtype='object')

In [6]:
df_index

RangeIndex(start=0, stop=21, step=1)

In [7]:
df_data

array([[63316.4, 'A1', -3082, -3101.0, -18.7, 3082, 3116, 34.19, '文件名称1'],
       [63316.4, 'A2', -4000, -4003.0, -2.6, 4000, 4035, 35.03, '文件名称2'],
       [63316.4, 'A3', -3082, -3043.0, 38.9, 3082, 3092, 10.5, '文件名称3'],
       [63318.2, 'A1', -3082, -3105.0, -23.2, 3082, 3120, 38.11, '文件名称4'],
       [63318.2, 'A2', -4000, -3998.0, 2.3, 4000, 4039, 38.46, '文件名称5'],
       [63318.2, 'A3', -3082, -3047.0, 34.7, 3082, 3066, -15.72, '文件名称6'],
       [63320.0, 'A1', -3082, -3101.0, -19.0, 3082, 3120, 38.09, '文件名称7'],
       [63320.0, 'A2', -4000, -3993.0, 6.8, 4000, 4029, 29.21, '文件名称8'],
       [63320.0, 'A3', -3082, -3043.0, 38.7, 3082, 3076, -6.12, '文件名称9'],
       [63321.8, 'A1', -3082, -3096.0, -14.3, 3082, 3114, 32.0, '文件名称10'],
       [63321.8, 'A2', -4000, -3991.0, 9.2, 4000, 4035, 35.28, '文件名称11'],
       [63321.8, 'A3', -3082, nan, 37.4, 3082, 3065, -16.58, '文件名称12'],
       [63323.6, 'A1', -3082, -3096.0, -14.2, 3082, 3114, 31.8, '文件名称13'],
       [63323.6, 'A2', -4000, -3996.0

In [8]:
# index的类型
type(df_index)

pandas.core.indexes.range.RangeIndex

In [9]:
# columns的类型
type(df_columns)

pandas.core.indexes.base.Index

In [10]:
# data的类型
type(df_data)

numpy.ndarray

In [11]:
# 判断是不是子类型
issubclass(pd.RangeIndex, pd.Index)  # 判断 pd.RangeIndex 是否为 pd.Index 的子类

True

In [12]:
# 访问index的值
df_index.values

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20])

In [13]:
# index的值是一个列表， 所以可以索引或者切片
df_index.values[0]

np.int64(0)

In [14]:
# 访问columns的值
df_columns.values

array(['里程', '测点位置', '设计宽度（mm）左', '实测宽度（mm）左', '差值（mm）左', '设计宽度（mm）右',
       '实测宽度（mm）右', '差值（mm）右', 'director_name'], dtype=object)

## 理解数据类型

In [15]:
#各列的类型
df_report.dtypes

里程               float64
测点位置              object
设计宽度（mm）左          int64
实测宽度（mm）左        float64
差值（mm）左          float64
设计宽度（mm）右          int64
实测宽度（mm）右          int64
差值（mm）右          float64
director_name     object
dtype: object

In [16]:
# 显示各类型的数量
df_report.dtypes.value_counts()

float64    4
int64      3
object     2
Name: count, dtype: int64

## 选择一列数据作为series

In [17]:
# 选择 实测宽度（mm）左 这列
df_report['实测宽度（mm）左']

0    -3101.0
1    -4003.0
2    -3043.0
3    -3105.0
4    -3998.0
       ...  
16   -4000.0
17   -3053.0
18   -3100.0
19   -3995.0
20   -3051.0
Name: 实测宽度（mm）左, Length: 21, dtype: float64

In [18]:
# 也可以通过属性的方式选取
df_report.director_name  # df_report.director_name 这种写法，Pandas 会把 director_name 当作 df_report 这个 DataFrame 的属性名（也就是列名）去查找

0      文件名称1
1      文件名称2
2      文件名称3
3      文件名称4
4      文件名称5
       ...  
16    文件名称17
17    文件名称18
18    文件名称19
19    文件名称20
20    文件名称21
Name: director_name, Length: 21, dtype: object

In [19]:
# 查看类型
type(df_report['director_name'])

pandas.core.series.Series

In [20]:
director = df_report['director_name']
# 查看选取的列的名字
director.name

'director_name'

In [21]:
# 单列 series 转化为 dataframe
director.to_frame().head()

Unnamed: 0,director_name
0,文件名称1
1,文件名称2
2,文件名称3
3,文件名称4
4,文件名称5


## 调用 series 方法


In [22]:
# 查看series所有不重复的指令
s_attr_methods = set(dir(pd.Series))
s_attr_methods

{'T',
 '_AXIS_LEN',
 '_AXIS_ORDERS',
 '_AXIS_TO_AXIS_NUMBER',
 '_HANDLED_TYPES',
 '__abs__',
 '__add__',
 '__and__',
 '__annotations__',
 '__array__',
 '__array_priority__',
 '__array_ufunc__',
 '__bool__',
 '__class__',
 '__column_consortium_standard__',
 '__contains__',
 '__copy__',
 '__deepcopy__',
 '__delattr__',
 '__delitem__',
 '__dict__',
 '__dir__',
 '__divmod__',
 '__doc__',
 '__eq__',
 '__finalize__',
 '__float__',
 '__floordiv__',
 '__format__',
 '__ge__',
 '__getattr__',
 '__getattribute__',
 '__getitem__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__iadd__',
 '__iand__',
 '__ifloordiv__',
 '__imod__',
 '__imul__',
 '__init__',
 '__init_subclass__',
 '__int__',
 '__invert__',
 '__ior__',
 '__ipow__',
 '__isub__',
 '__iter__',
 '__itruediv__',
 '__ixor__',
 '__le__',
 '__len__',
 '__lt__',
 '__matmul__',
 '__mod__',
 '__module__',
 '__mul__',
 '__ne__',
 '__neg__',
 '__new__',
 '__nonzero__',
 '__or__',
 '__pandas_priority__',
 '__pos__',
 '__pow__',
 '__radd__',
 '__rand__

In [23]:
len(s_attr_methods)

419

In [24]:
# 查看dataframe所有不重复的指令
df_attr_methods =set(dir(pd.DataFrame))
len(df_attr_methods)

437

In [25]:
# 查看dataframe 和 series 有多少共有的指令
len(s_attr_methods & df_attr_methods)

362

In [26]:
# 选择df_report中任意两列
director = df_report['director_name']
actual_left = df_report['实测宽度（mm）左']

In [27]:
actual_left.head()

0   -3101.0
1   -4003.0
2   -3043.0
3   -3105.0
4   -3998.0
Name: 实测宽度（mm）左, dtype: float64

In [28]:
director.head()

0    文件名称1
1    文件名称2
2    文件名称3
3    文件名称4
4    文件名称5
Name: director_name, dtype: object

In [29]:
# 分别计数
pd.set_option('display.max_rows', 8)
actual_left.value_counts()

实测宽度（mm）左
-3101.0    3
-3043.0    2
-3047.0    2
-3096.0    2
          ..
-3053.0    1
-3100.0    1
-3995.0    1
-3051.0    1
Name: count, Length: 15, dtype: int64

In [30]:
actual_left.size  # 返回对象中元素的总数

21

In [31]:
actual_left.shape

(21,)

In [32]:
len(actual_left)

21

In [33]:
# actual_left有多少非空值
actual_left.count()

np.int64(20)

In [34]:
# actual_left 中位分位数
actual_left.quantile()

np.float64(-3101.0)

In [35]:
# 最⼩值、最⼤值、平均值、中位数、标准差、总和
actual_left.min()

np.float64(-4003.0)

In [36]:
actual_left.max()

np.float64(-3043.0)

In [37]:
actual_left.mean()

np.float64(-3398.0)

In [38]:
actual_left.median()

np.float64(-3101.0)

In [39]:
actual_left.std()

np.float64(451.17915685340273)

In [40]:
actual_left.sum()

np.float64(-67960.0)

In [41]:
# 显示描述信息
actual_left.describe()

count      20.000000
mean    -3398.000000
std       451.179157
min     -4003.000000
25%     -3993.500000
50%     -3101.000000
75%     -3052.500000
max     -3043.000000
Name: 实测宽度（mm）左, dtype: float64

In [42]:
# 分位数是将数据按从小到大排序后，划分成若干等份的数值点
actual_left.quantile(.2)  # 计算第 20 百分位数

np.float64(-3995.2)

In [43]:
# 各个十分之一分位数
actual_left.quantile([.1, .2, .3,.4, .5, .6, .7, .8, .9])

0.1   -3998.2
0.2   -3995.2
0.3   -3991.6
0.4   -3102.6
        ...  
0.6   -3098.4
0.7   -3083.1
0.8   -3050.2
0.9   -3046.6
Name: 实测宽度（mm）左, Length: 9, dtype: float64

In [44]:
# 非空值
actual_left.isnull()

0     False
1     False
2     False
3     False
      ...  
17    False
18    False
19    False
20    False
Name: 实测宽度（mm）左, Length: 21, dtype: bool

In [45]:
# 填充缺失值
actual_left_filled = actual_left.fillna(0)
actual_left_filled.count()

np.int64(21)

In [46]:
# 删除缺失值
actual_left_dropped = actual_left.dropna()
actual_left_dropped.size

20

In [47]:
actual_left_dropped

0    -3101.0
1    -4003.0
2    -3043.0
3    -3105.0
       ...  
17   -3053.0
18   -3100.0
19   -3995.0
20   -3051.0
Name: 实测宽度（mm）左, Length: 20, dtype: float64

In [48]:
# value_counts(normalize=True) 可返回频率
actual_left.value_counts(normalize=True)

实测宽度（mm）左
-3101.0    0.15
-3043.0    0.10
-3047.0    0.10
-3096.0    0.10
           ... 
-3053.0    0.05
-3100.0    0.05
-3995.0    0.05
-3051.0    0.05
Name: proportion, Length: 15, dtype: float64

In [49]:
# 判断是否有缺失值
actual_left.hasnans

True

In [50]:
# 判断是否是非缺失值
actual_left.notnull()


0     True
1     True
2     True
3     True
      ... 
17    True
18    True
19    True
20    True
Name: 实测宽度（mm）左, Length: 21, dtype: bool

## 在series上使用运算符

In [51]:
actual_left = df_report['实测宽度（mm）左']
actual_left

0    -3101.0
1    -4003.0
2    -3043.0
3    -3105.0
       ...  
17   -3053.0
18   -3100.0
19   -3995.0
20   -3051.0
Name: 实测宽度（mm）左, Length: 21, dtype: float64

In [52]:
# 每列值加1
actual_left + 1

0    -3100.0
1    -4002.0
2    -3042.0
3    -3104.0
       ...  
17   -3052.0
18   -3099.0
19   -3994.0
20   -3050.0
Name: 实测宽度（mm）左, Length: 21, dtype: float64

In [53]:
actual_left.add(1)

0    -3100.0
1    -4002.0
2    -3042.0
3    -3104.0
       ...  
17   -3052.0
18   -3099.0
19   -3994.0
20   -3050.0
Name: 实测宽度（mm）左, Length: 21, dtype: float64

In [54]:
actual_left

0    -3101.0
1    -4003.0
2    -3043.0
3    -3105.0
       ...  
17   -3053.0
18   -3100.0
19   -3995.0
20   -3051.0
Name: 实测宽度（mm）左, Length: 21, dtype: float64

In [55]:
# 每列值乘以2.5
actual_left * 2.5

0     -7752.5
1    -10007.5
2     -7607.5
3     -7762.5
       ...   
17    -7632.5
18    -7750.0
19    -9987.5
20    -7627.5
Name: 实测宽度（mm）左, Length: 21, dtype: float64

In [56]:
actual_left.mul(2.5)

0     -7752.5
1    -10007.5
2     -7607.5
3     -7762.5
       ...   
17    -7632.5
18    -7750.0
19    -9987.5
20    -7627.5
Name: 实测宽度（mm）左, Length: 21, dtype: float64

In [57]:
# 每列值对7整除（对两个数做除法运算，然后返回商的整数部分，会舍去小数部分）
actual_left // 7

0    -443.0
1    -572.0
2    -435.0
3    -444.0
      ...  
17   -437.0
18   -443.0
19   -571.0
20   -436.0
Name: 实测宽度（mm）左, Length: 21, dtype: float64

In [58]:
actual_left.floordiv(7)

0    -443.0
1    -572.0
2    -435.0
3    -444.0
      ...  
17   -437.0
18   -443.0
19   -571.0
20   -436.0
Name: 实测宽度（mm）左, Length: 21, dtype: float64

In [59]:
# 取余
actual_left % 7

0     0.0
1     1.0
2     2.0
3     3.0
     ... 
17    6.0
18    1.0
19    2.0
20    1.0
Name: 实测宽度（mm）左, Length: 21, dtype: float64

In [60]:
actual_left.mod(7)

0     0.0
1     1.0
2     2.0
3     3.0
     ... 
17    6.0
18    1.0
19    2.0
20    1.0
Name: 实测宽度（mm）左, Length: 21, dtype: float64

In [61]:
# 判断是否大于7
actual_left > 7

0     False
1     False
2     False
3     False
      ...  
17    False
18    False
19    False
20    False
Name: 实测宽度（mm）左, Length: 21, dtype: bool

In [62]:
actual_left.gt(7)

0     False
1     False
2     False
3     False
      ...  
17    False
18    False
19    False
20    False
Name: 实测宽度（mm）左, Length: 21, dtype: bool

In [63]:
# 判断是否等于字符串
director = df_report['director_name']
director == '文件名称4'

0     False
1     False
2     False
3      True
      ...  
17    False
18    False
19    False
20    False
Name: director_name, Length: 21, dtype: bool

In [64]:
director.eq('文件名称4')

0     False
1     False
2     False
3      True
      ...  
17    False
18    False
19    False
20    False
Name: director_name, Length: 21, dtype: bool

In [65]:
a = type(1)
print(a)  # type(1) 返回 int，因为 1 是整数类型。
type(a)  # type(a) 返回 type，因为 a 是一个类型对象（即 int 类型），而所有类型对象的类型都是 type。

<class 'int'>


type

In [66]:
a = type(actual_left)
print(a)
a([1, 2, 3])  # 基于a类型创建新对象

<class 'pandas.core.series.Series'>


0    1
1    2
2    3
dtype: int64

## 串联series方法

In [67]:
# 计数，查看前三
actual_left = df_report['实测宽度（mm）左']
actual_left.value_counts().head(3)

实测宽度（mm）左
-3101.0    3
-3043.0    2
-3047.0    2
Name: count, dtype: int64

In [68]:
# 统计缺失值的数量
actual_left.isnull().sum()

np.int64(1)

In [69]:
# 查看数据类型
actual_left.dtype


dtype('float64')

In [70]:
# 缺失值填充为0，转换为整形，查看前五
actual_left.fillna(0)\
           .astype(int)\
           .head()


0   -3101
1   -4003
2   -3043
3   -3105
4   -3998
Name: 实测宽度（mm）左, dtype: int64

In [71]:
# 缺失值的比例
actual_left.isnull().mean()

np.float64(0.047619047619047616)

In [72]:
# 使用 （） 串联
(actual_left.fillna(0)
            .astype(int)
            .head())

0   -3101
1   -4003
2   -3043
3   -3105
4   -3998
Name: 实测宽度（mm）左, dtype: int64

## 使 索引 有意义

In [73]:
# set_index()给行索引命名
df_report.shape

df_report2 = df_report.set_index('里程')
df_report2

Unnamed: 0_level_0,测点位置,设计宽度（mm）左,实测宽度（mm）左,差值（mm）左,设计宽度（mm）右,实测宽度（mm）右,差值（mm）右,director_name
里程,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
63316.4,A1,-3082,-3101.0,-18.7,3082,3116,34.19,文件名称1
63316.4,A2,-4000,-4003.0,-2.6,4000,4035,35.03,文件名称2
63316.4,A3,-3082,-3043.0,38.9,3082,3092,10.50,文件名称3
63318.2,A1,-3082,-3105.0,-23.2,3082,3120,38.11,文件名称4
...,...,...,...,...,...,...,...,...
63325.4,A3,-3082,-3053.0,28.9,3082,3078,-3.95,文件名称18
63327.2,A1,-3082,-3100.0,-18.6,3082,3122,39.68,文件名称19
63327.2,A2,-4000,-3995.0,5.0,4000,4039,39.42,文件名称20
63327.2,A3,-3082,-3051.0,30.8,3082,3076,-6.33,文件名称21


In [74]:
# 通过 index_col 参数命名
pd.read_csv('data.csv',
             encoding='gb2312',
             index_col="里程")

Unnamed: 0_level_0,测点位置,设计宽度（mm）左,实测宽度（mm）左,差值（mm）左,设计宽度（mm）右,实测宽度（mm）右,差值（mm）右,director_name
里程,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
63316.4,A1,-3082,-3101.0,-18.7,3082,3116,34.19,文件名称1
63316.4,A2,-4000,-4003.0,-2.6,4000,4035,35.03,文件名称2
63316.4,A3,-3082,-3043.0,38.9,3082,3092,10.50,文件名称3
63318.2,A1,-3082,-3105.0,-23.2,3082,3120,38.11,文件名称4
...,...,...,...,...,...,...,...,...
63325.4,A3,-3082,-3053.0,28.9,3082,3078,-3.95,文件名称18
63327.2,A1,-3082,-3100.0,-18.6,3082,3122,39.68,文件名称19
63327.2,A2,-4000,-3995.0,5.0,4000,4039,39.42,文件名称20
63327.2,A3,-3082,-3051.0,30.8,3082,3076,-6.33,文件名称21


In [75]:
# 复原索引
df_report2.reset_index()

Unnamed: 0,里程,测点位置,设计宽度（mm）左,实测宽度（mm）左,...,设计宽度（mm）右,实测宽度（mm）右,差值（mm）右,director_name
0,63316.4,A1,-3082,-3101.0,...,3082,3116,34.19,文件名称1
1,63316.4,A2,-4000,-4003.0,...,4000,4035,35.03,文件名称2
2,63316.4,A3,-3082,-3043.0,...,3082,3092,10.50,文件名称3
3,63318.2,A1,-3082,-3105.0,...,3082,3120,38.11,文件名称4
...,...,...,...,...,...,...,...,...,...
17,63325.4,A3,-3082,-3053.0,...,3082,3078,-3.95,文件名称18
18,63327.2,A1,-3082,-3100.0,...,3082,3122,39.68,文件名称19
19,63327.2,A2,-4000,-3995.0,...,4000,4039,39.42,文件名称20
20,63327.2,A3,-3082,-3051.0,...,3082,3076,-6.33,文件名称21


## 重命名行名和列名

In [76]:
# 通过 rename 重命名
df_report = pd.read_csv('data.csv',
             encoding='gb2312',
             index_col="里程")

idx_rename = {63316.4: '63316.4-1', 63318.2: '63318.2-1'}
col_rename = {'里程': '桩号', 'director_name': '文件名称'}  # 行索引无法对其重命名

# 索引名称重命名
df_report = df_report.rename_axis('桩号1')

df_report2 =  df_report.rename(index=idx_rename,
                       columns=col_rename).head()

df_report2

Unnamed: 0_level_0,测点位置,设计宽度（mm）左,实测宽度（mm）左,差值（mm）左,设计宽度（mm）右,实测宽度（mm）右,差值（mm）右,文件名称
桩号1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
63316.4-1,A1,-3082,-3101.0,-18.7,3082,3116,34.19,文件名称1
63316.4-1,A2,-4000,-4003.0,-2.6,4000,4035,35.03,文件名称2
63316.4-1,A3,-3082,-3043.0,38.9,3082,3092,10.5,文件名称3
63318.2-1,A1,-3082,-3105.0,-23.2,3082,3120,38.11,文件名称4
63318.2-1,A2,-4000,-3998.0,2.3,4000,4039,38.46,文件名称5


In [77]:
# 方法2：将index和columns属性转化为列表

index = df_report.index
columns = df_report.columns

index_list = index.tolist()  # 转化为列表
column_list = columns.tolist()

In [78]:
index_list

[63316.4,
 63316.4,
 63316.4,
 63318.2,
 63318.2,
 63318.2,
 63320.0,
 63320.0,
 63320.0,
 63321.8,
 63321.8,
 63321.8,
 63323.6,
 63323.6,
 63323.6,
 63325.4,
 63325.4,
 63325.4,
 63327.2,
 63327.2,
 63327.2]

In [79]:
column_list

['测点位置',
 '设计宽度（mm）左',
 '实测宽度（mm）左',
 '差值（mm）左',
 '设计宽度（mm）右',
 '实测宽度（mm）右',
 '差值（mm）右',
 'director_name']

In [80]:
index_list[0] = 'Ratava'
index_list[2] = 'Ertceps'
column_list[1] = 'Director Name'
column_list[2] = 'Critical Reviews'

In [81]:
index_list[:5]

['Ratava', 63316.4, 'Ertceps', 63318.2, 63318.2]

In [82]:
column_list

['测点位置',
 'Director Name',
 'Critical Reviews',
 '差值（mm）左',
 '设计宽度（mm）右',
 '实测宽度（mm）右',
 '差值（mm）右',
 'director_name']

In [83]:
df_report.index = index_list
df_report.columns = column_list

In [84]:
df_report.head()

Unnamed: 0,测点位置,Director Name,Critical Reviews,差值（mm）左,设计宽度（mm）右,实测宽度（mm）右,差值（mm）右,director_name
Ratava,A1,-3082,-3101.0,-18.7,3082,3116,34.19,文件名称1
63316.4,A2,-4000,-4003.0,-2.6,4000,4035,35.03,文件名称2
Ertceps,A3,-3082,-3043.0,38.9,3082,3092,10.5,文件名称3
63318.2,A1,-3082,-3105.0,-23.2,3082,3120,38.11,文件名称4
63318.2,A2,-4000,-3998.0,2.3,4000,4039,38.46,文件名称5


## 创建、删除列

In [85]:
df_report = pd.read_csv('data.csv',
             encoding='gb2312')
df_report['has_seen'] = 0

In [86]:
df_report.columns

Index(['里程', '测点位置', '设计宽度（mm）左', '实测宽度（mm）左', '差值（mm）左', '设计宽度（mm）右',
       '实测宽度（mm）右', '差值（mm）右', 'director_name', 'has_seen'],
      dtype='object')

In [87]:
# 给新列赋值
df_report['水平宽度'] = abs(df_report['实测宽度（mm）左']) + df_report['实测宽度（mm）右']  # 值 + nan = nan
df_report['水平宽度'].isnull().sum()

np.int64(1)

In [88]:
df_report['实测宽度（mm）左']

0    -3101.0
1    -4003.0
2    -3043.0
3    -3105.0
       ...  
17   -3053.0
18   -3100.0
19   -3995.0
20   -3051.0
Name: 实测宽度（mm）左, Length: 21, dtype: float64

In [89]:
df_report['实测宽度（mm）右']

0     3116
1     4035
2     3092
3     3120
      ... 
17    3078
18    3122
19    4039
20    3076
Name: 实测宽度（mm）右, Length: 21, dtype: int64

In [90]:
df_report['水平宽度']

0     6217.0
1     8038.0
2     6135.0
3     6225.0
       ...  
17    6131.0
18    6222.0
19    8034.0
20    6127.0
Name: 水平宽度, Length: 21, dtype: float64

In [91]:
# 用all()检查是否所有的布尔值都为true
df_report['实测宽度（mm）左'] = df_report['实测宽度（mm）左'].fillna(0)
df_report['实测宽度（mm）左']

0    -3101.0
1    -4003.0
2    -3043.0
3    -3105.0
       ...  
17   -3053.0
18   -3100.0
19   -3995.0
20   -3051.0
Name: 实测宽度（mm）左, Length: 21, dtype: float64

In [92]:
df_report['is_left_large'] = (abs(df_report['实测宽度（mm）左']) >= df_report['实测宽度（mm）右'])
df_report['is_left_large']

0     False
1     False
2     False
3     False
      ...  
17    False
18    False
19    False
20    False
Name: is_left_large, Length: 21, dtype: bool

In [93]:
df_report['is_left_large'].all()  # 检查 Series 里的所有元素是否都为真值(只要有一个元素为 False 就返回 False)

np.False_

In [94]:
df_report

Unnamed: 0,里程,测点位置,设计宽度（mm）左,实测宽度（mm）左,...,director_name,has_seen,水平宽度,is_left_large
0,63316.4,A1,-3082,-3101.0,...,文件名称1,0,6217.0,False
1,63316.4,A2,-4000,-4003.0,...,文件名称2,0,8038.0,False
2,63316.4,A3,-3082,-3043.0,...,文件名称3,0,6135.0,False
3,63318.2,A1,-3082,-3105.0,...,文件名称4,0,6225.0,False
...,...,...,...,...,...,...,...,...,...
17,63325.4,A3,-3082,-3053.0,...,文件名称18,0,6131.0,False
18,63327.2,A1,-3082,-3100.0,...,文件名称19,0,6222.0,False
19,63327.2,A2,-4000,-3995.0,...,文件名称20,0,8034.0,False
20,63327.2,A3,-3082,-3051.0,...,文件名称21,0,6127.0,False


In [95]:
df_report = df_report.drop('is_left_large',axis='columns')  # 从 df_report DataFrame 里移除名为 is_left_large 的列，并将移除列后的新 DataFrame 重新赋值给 df_report 变量

In [96]:
df_report.columns.get_loc('里程')

0

In [97]:
# 用 insert() 方法原地插入列
profit_index = df_report.columns.get_loc('差值（mm）右') + 1  # get_loc 用于获取指定标签（label）在索引中的整数位置
profit_index

8

In [98]:
df_report.insert(loc=profit_index,
             column='差值',
             value=df_report['差值（mm）左'] + df_report['差值（mm）右'])

In [99]:
df_report.head()

Unnamed: 0,里程,测点位置,设计宽度（mm）左,实测宽度（mm）左,...,差值,director_name,has_seen,水平宽度
0,63316.4,A1,-3082,-3101.0,...,15.49,文件名称1,0,6217.0
1,63316.4,A2,-4000,-4003.0,...,32.43,文件名称2,0,8038.0
2,63316.4,A3,-3082,-3043.0,...,49.4,文件名称3,0,6135.0
3,63318.2,A1,-3082,-3105.0,...,14.91,文件名称4,0,6225.0
4,63318.2,A2,-4000,-3998.0,...,40.76,文件名称5,0,8037.0
