## 1. 导入必要的库

In [11]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

## 2. 读取数据

使用 pandas 库加载 CSV 文件：

In [12]:
# 读取数据
data = pd.read_csv('../data/air/城市_20220101-20221231/china_cities_20220103.csv')

# 查看数据前5行
data.head()

Unnamed: 0,date,hour,type,北京,天津,石家庄,唐山,秦皇岛,邯郸,保定,...,塔城地区,阿勒泰地区,石河子,五家渠,三沙,兰州新区,赣江新区,儋州,雄安新区,西咸新区
0,20220103,0,AQI,46.0,64.0,64.0,67.0,59.0,70.0,58.0,...,23.0,18.0,256.0,350.0,,67.0,79.0,63.0,70.0,218.0
1,20220103,0,PM2.5,23.0,29.0,34.0,34.0,32.0,51.0,34.0,...,16.0,10.0,206.0,300.0,,40.0,58.0,45.0,42.0,168.0
2,20220103,0,PM2.5_24h,13.0,14.0,28.0,16.0,21.0,45.0,17.0,...,17.0,5.0,163.0,272.0,,51.0,58.0,31.0,15.0,93.0
3,20220103,0,PM10,46.0,77.0,77.0,84.0,68.0,84.0,65.0,...,23.0,2.0,345.0,442.0,,84.0,108.0,59.0,89.0,161.0
4,20220103,0,PM10_24h,29.0,36.0,56.0,40.0,46.0,73.0,41.0,...,31.0,8.0,269.0,421.0,,91.0,105.0,48.0,47.0,116.0


## 3. 基本数据统计分析

通过 describe() 方法可以生成数据的基本统计信息，包括均值、标准差、最小值、最大值等：

In [13]:
# 基本统计分析
data_statistics = data.describe()
print(data_statistics)

             date        hour          北京          天津         石家庄          唐山  \
count       360.0  360.000000  360.000000  360.000000  360.000000  360.000000   
mean   20220103.0   11.500000   25.527889   33.592472   34.151056   35.402278   
std           0.0    6.931821   19.578860   26.643498   29.092878   28.944640   
min    20220103.0    0.000000    0.320000    0.550000    0.420000    0.670000   
25%    20220103.0    5.750000    3.000000    8.000000    8.000000    8.000000   
50%    20220103.0   11.500000   24.000000   33.500000   30.500000   33.000000   
75%    20220103.0   17.250000   44.000000   56.250000   57.250000   58.250000   
max    20220103.0   23.000000   64.000000  110.000000  135.000000  120.000000   

              秦皇岛          邯郸          保定         张家口  ...        塔城地区  \
count  360.000000  360.000000  360.000000  360.000000  ...  356.000000   
mean    35.156972   43.423611   35.314306   28.639056  ...   29.346910   
std     23.246978   33.362452   28.475989   21.9

## 4. 处理缺失值

缺失数据（NaN 值）在环境监测数据中比较常见。可以选择用列的均值填充缺失值：

In [14]:
# 用列的均值填充缺失值
data_filled = data.fillna(data.mean())

# 检查填充后的数据
data_filled.isnull().sum()  # 查看是否还有缺失值

TypeError: Could not convert ['AQIPM2.5PM2.5_24hPM10PM10_24hSO2SO2_24hNO2NO2_24hO3O3_24hO3_8hO3_8h_24hCOCO_24hAQIPM2.5PM2.5_24hPM10PM10_24hSO2SO2_24hNO2NO2_24hO3O3_24hO3_8hO3_8h_24hCOCO_24hAQIPM2.5PM2.5_24hPM10PM10_24hSO2SO2_24hNO2NO2_24hO3O3_24hO3_8hO3_8h_24hCOCO_24hAQIPM2.5PM2.5_24hPM10PM10_24hSO2SO2_24hNO2NO2_24hO3O3_24hO3_8hO3_8h_24hCOCO_24hAQIPM2.5PM2.5_24hPM10PM10_24hSO2SO2_24hNO2NO2_24hO3O3_24hO3_8hO3_8h_24hCOCO_24hAQIPM2.5PM2.5_24hPM10PM10_24hSO2SO2_24hNO2NO2_24hO3O3_24hO3_8hO3_8h_24hCOCO_24hAQIPM2.5PM2.5_24hPM10PM10_24hSO2SO2_24hNO2NO2_24hO3O3_24hO3_8hO3_8h_24hCOCO_24hAQIPM2.5PM2.5_24hPM10PM10_24hSO2SO2_24hNO2NO2_24hO3O3_24hO3_8hO3_8h_24hCOCO_24hAQIPM2.5PM2.5_24hPM10PM10_24hSO2SO2_24hNO2NO2_24hO3O3_24hO3_8hO3_8h_24hCOCO_24hAQIPM2.5PM2.5_24hPM10PM10_24hSO2SO2_24hNO2NO2_24hO3O3_24hO3_8hO3_8h_24hCOCO_24hAQIPM2.5PM2.5_24hPM10PM10_24hSO2SO2_24hNO2NO2_24hO3O3_24hO3_8hO3_8h_24hCOCO_24hAQIPM2.5PM2.5_24hPM10PM10_24hSO2SO2_24hNO2NO2_24hO3O3_24hO3_8hO3_8h_24hCOCO_24hAQIPM2.5PM2.5_24hPM10PM10_24hSO2SO2_24hNO2NO2_24hO3O3_24hO3_8hO3_8h_24hCOCO_24hAQIPM2.5PM2.5_24hPM10PM10_24hSO2SO2_24hNO2NO2_24hO3O3_24hO3_8hO3_8h_24hCOCO_24hAQIPM2.5PM2.5_24hPM10PM10_24hSO2SO2_24hNO2NO2_24hO3O3_24hO3_8hO3_8h_24hCOCO_24hAQIPM2.5PM2.5_24hPM10PM10_24hSO2SO2_24hNO2NO2_24hO3O3_24hO3_8hO3_8h_24hCOCO_24hAQIPM2.5PM2.5_24hPM10PM10_24hSO2SO2_24hNO2NO2_24hO3O3_24hO3_8hO3_8h_24hCOCO_24hAQIPM2.5PM2.5_24hPM10PM10_24hSO2SO2_24hNO2NO2_24hO3O3_24hO3_8hO3_8h_24hCOCO_24hAQIPM2.5PM2.5_24hPM10PM10_24hSO2SO2_24hNO2NO2_24hO3O3_24hO3_8hO3_8h_24hCOCO_24hAQIPM2.5PM2.5_24hPM10PM10_24hSO2SO2_24hNO2NO2_24hO3O3_24hO3_8hO3_8h_24hCOCO_24hAQIPM2.5PM2.5_24hPM10PM10_24hSO2SO2_24hNO2NO2_24hO3O3_24hO3_8hO3_8h_24hCOCO_24hAQIPM2.5PM2.5_24hPM10PM10_24hSO2SO2_24hNO2NO2_24hO3O3_24hO3_8hO3_8h_24hCOCO_24hAQIPM2.5PM2.5_24hPM10PM10_24hSO2SO2_24hNO2NO2_24hO3O3_24hO3_8hO3_8h_24hCOCO_24hAQIPM2.5PM2.5_24hPM10PM10_24hSO2SO2_24hNO2NO2_24hO3O3_24hO3_8hO3_8h_24hCOCO_24h'] to numeric