# Pandas的SettingWithCopyWarning报警

## 0、读取数据

In [12]:
import pandas as pd

In [16]:
fpath = "./datas/beijing_tianqi/beijing_tianqi_2018.csv"
df = pd.read_csv(fpath,encoding='gbk')

In [17]:
df.head()

Unnamed: 0,ymd,bWendu,yWendu,tianqi,fengxiang,fengli,aqi,aqiInfo,aqiLevel
0,2018-01-01,3℃,-6℃,晴~多云,东北风,1-2级,59,良,2
1,2018-01-02,2℃,-5℃,阴~多云,东北风,1-2级,49,优,1
2,2018-01-03,2℃,-5℃,多云,北风,1-2级,28,优,1
3,2018-01-04,0℃,-8℃,阴,东北风,1-2级,28,优,1
4,2018-01-05,3℃,-6℃,多云~晴,西北风,1-2级,50,优,1


In [18]:
# 替换掉温度的后缀℃
df.loc[:,"bWendu"] = df["bWendu"].str.replace("℃","").astype('int32')
df.loc[:,"yWendu"] = df["yWendu"].str.replace("℃","").astype('int32')

In [19]:
df.head()

Unnamed: 0,ymd,bWendu,yWendu,tianqi,fengxiang,fengli,aqi,aqiInfo,aqiLevel
0,2018-01-01,3,-6,晴~多云,东北风,1-2级,59,良,2
1,2018-01-02,2,-5,阴~多云,东北风,1-2级,49,优,1
2,2018-01-03,2,-5,多云,北风,1-2级,28,优,1
3,2018-01-04,0,-8,阴,东北风,1-2级,28,优,1
4,2018-01-05,3,-6,多云~晴,西北风,1-2级,50,优,1


## 1、复现

In [20]:
# 只选出3月份的数据用于分析
condition = df["ymd"].str.startswith("2018-03")

In [21]:
# 设置温差
df[condition]["wen_cha"] = df["bWendu"]-df["yWendu"]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[condition]["wen_cha"] = df["bWendu"]-df["yWendu"]


In [23]:
# 查看是否修改成功
df[condition].head()
# 当发出SettingWithCopyWarning这个报警的时候，你的修改有时候会成功，有时候不会成功

Unnamed: 0,ymd,bWendu,yWendu,tianqi,fengxiang,fengli,aqi,aqiInfo,aqiLevel
17,2018-03-01,8,-3,多云,西南风,1-2级,46,优,1
18,2018-03-02,9,-1,晴~多云,北风,1-2级,95,良,2
19,2018-03-03,13,3,多云~阴,北风,1-2级,214,重度污染,5
20,2018-03-04,7,-2,阴~多云,东南风,1-2级,144,轻度污染,3
21,2018-03-05,8,-3,晴,南风,1-2级,94,良,2


## 2、原因
 #### 发出警告的代码df[condition]["wen_cha"] = df["bWendu"]-df["yWendu"]
 #### 相当于：df.get(condition).set(wen_cha),第一步骤的get发出了报警
 ## 链式操作其实是两个步骤，先get后set，get得到的dataframe可能是view，也可能是copy，所以pandas发出警告（view是df的子视图，修改直接影响源df；copy是复制版本，对原df没有影响）。
 #### 核心要诀：pandas的dataframe的修改写操作，只允许在源dataframe上进行，一步到位（不能先筛选get，再进行set）。

## 3、解决方法1
 #### 将get+set的两步操作，改成set的一步操作

In [24]:
df.loc[condition,"wencha"] = df["bWendu"]-df["yWendu"]

In [25]:
df[condition].head()

Unnamed: 0,ymd,bWendu,yWendu,tianqi,fengxiang,fengli,aqi,aqiInfo,aqiLevel,wencha
17,2018-03-01,8,-3,多云,西南风,1-2级,46,优,1,11
18,2018-03-02,9,-1,晴~多云,北风,1-2级,95,良,2,10
19,2018-03-03,13,3,多云~阴,北风,1-2级,214,重度污染,5,10
20,2018-03-04,7,-2,阴~多云,东南风,1-2级,144,轻度污染,3,9
21,2018-03-05,8,-3,晴,南风,1-2级,94,良,2,11


## 4、解决方法2
#### 如果需要预筛选数据做后续的处理分析，使用copy复制dataframe

In [27]:
df_month3 = df[condition].copy() # 得到一个新的dataframe

In [28]:
df_month3.head()

Unnamed: 0,ymd,bWendu,yWendu,tianqi,fengxiang,fengli,aqi,aqiInfo,aqiLevel,wencha
17,2018-03-01,8,-3,多云,西南风,1-2级,46,优,1,11
18,2018-03-02,9,-1,晴~多云,北风,1-2级,95,良,2,10
19,2018-03-03,13,3,多云~阴,北风,1-2级,214,重度污染,5,10
20,2018-03-04,7,-2,阴~多云,东南风,1-2级,144,轻度污染,3,9
21,2018-03-05,8,-3,晴,南风,1-2级,94,良,2,11


In [29]:
# 然后在新的dataframe上使用set操作来赋值
df_month3["wencha"] = df["bWendu"]-df["yWendu"]

In [31]:
df_month3.head() # 没有报警，且成功

Unnamed: 0,ymd,bWendu,yWendu,tianqi,fengxiang,fengli,aqi,aqiInfo,aqiLevel,wencha
17,2018-03-01,8,-3,多云,西南风,1-2级,46,优,1,11
18,2018-03-02,9,-1,晴~多云,北风,1-2级,95,良,2,10
19,2018-03-03,13,3,多云~阴,北风,1-2级,214,重度污染,5,10
20,2018-03-04,7,-2,阴~多云,东南风,1-2级,144,轻度污染,3,9
21,2018-03-05,8,-3,晴,南风,1-2级,94,良,2,11


## 总之，pandas不允许先筛选子dataframe，再进行修改写入
#### 要么使用.loc实现一个步骤直接修改源dataframe
#### 要么先复制一个子dataframe再一个步骤执行修改