In [1]:
import xarray as xr
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

from matplotlib import rcParams
config = {
    "font.family":'Times New Roman',
    "mathtext.fontset":'stix',
    "font.serif": ['SimSun'],
}
rcParams.update(config)

# silence the warning note
import warnings
warnings.filterwarnings("ignore")

import sys
sys.path.append('../src/')

In [2]:
path = "F:/Data/Graduation/OBS_Seasons/"

for foldername in os.listdir(path):
    if foldername.startswith("城市"):
        for filename in os.listdir(os.path.join(path, foldername)):
            if filename.endswith(".csv"):
                date_str = filename.split("_")[-1].split(".")[0]
                try:
                    year = int(date_str[:4])
                    month = int(date_str[4:6])
                    day = int(date_str[6:])
                    if month == 7 or month == 9:
                        continue
                except ValueError:
                    continue
                os.remove(os.path.join(path, foldername, filename)) # 删除不符合条件的文件

In [2]:
path = "F:/Data/Graduation/OBS_Seasons/"
cities = ['广州','佛山','东莞','深圳','中山','珠海','江门','肇庆','惠州']
columns = ['date','hour','type']+cities

In [3]:
dfs = []

for foldername in os.listdir(path):
    if foldername.startswith("城市"):
        for filename in os.listdir(os.path.join(path, foldername)):
            df = pd.read_csv(os.path.join(path, foldername, filename))
            df_sel = df[[col for col in df.columns if col in columns]]
            O3_group = df_sel.groupby('type').get_group('O3_8h')
            dfs.append(O3_group)

dfout = pd.concat(dfs,ignore_index=True)
dfout            

Unnamed: 0,date,hour,type,广州,深圳,珠海,佛山,中山,江门,东莞,惠州,肇庆
0,20140513,0,O3_8h,29.0,32.0,48.0,37.0,33.0,60.0,32.0,47.0,68.0
1,20140513,1,O3_8h,25.0,27.0,40.0,28.0,28.0,44.0,22.0,41.0,67.0
2,20140513,2,O3_8h,23.0,23.0,35.0,19.0,23.0,29.0,15.0,35.0,63.0
3,20140513,3,O3_8h,23.0,20.0,32.0,13.0,17.0,18.0,12.0,32.0,56.0
4,20140513,4,O3_8h,22.0,19.0,30.0,10.0,12.0,12.0,29.0,29.0,49.0
...,...,...,...,...,...,...,...,...,...,...,...,...
76915,20230520,19,O3_8h,99.0,69.0,59.0,89.0,68.0,69.0,76.0,81.0,89.0
76916,20230520,20,O3_8h,93.0,66.0,57.0,83.0,65.0,64.0,70.0,77.0,85.0
76917,20230520,21,O3_8h,85.0,61.0,54.0,74.0,61.0,59.0,65.0,71.0,81.0
76918,20230520,22,O3_8h,74.0,56.0,52.0,66.0,58.0,55.0,60.0,65.0,75.0


In [4]:
dfout['avg'] = dfout[cities].mean(axis=1)
dfout['datetime'] = pd.to_datetime(dfout['date'].astype(str)+'T'+dfout['hour'].astype(str).str.zfill(2))
dfout.drop(['date', 'hour', 'type'], axis=1, inplace=True)
dfout.set_index('datetime',inplace=True)
dfout

Unnamed: 0_level_0,广州,深圳,珠海,佛山,中山,江门,东莞,惠州,肇庆,avg
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2014-05-13 00:00:00,29.0,32.0,48.0,37.0,33.0,60.0,32.0,47.0,68.0,42.888889
2014-05-13 01:00:00,25.0,27.0,40.0,28.0,28.0,44.0,22.0,41.0,67.0,35.777778
2014-05-13 02:00:00,23.0,23.0,35.0,19.0,23.0,29.0,15.0,35.0,63.0,29.444444
2014-05-13 03:00:00,23.0,20.0,32.0,13.0,17.0,18.0,12.0,32.0,56.0,24.777778
2014-05-13 04:00:00,22.0,19.0,30.0,10.0,12.0,12.0,29.0,29.0,49.0,23.555556
...,...,...,...,...,...,...,...,...,...,...
2023-05-20 19:00:00,99.0,69.0,59.0,89.0,68.0,69.0,76.0,81.0,89.0,77.666667
2023-05-20 20:00:00,93.0,66.0,57.0,83.0,65.0,64.0,70.0,77.0,85.0,73.333333
2023-05-20 21:00:00,85.0,61.0,54.0,74.0,61.0,59.0,65.0,71.0,81.0,67.888889
2023-05-20 22:00:00,74.0,56.0,52.0,66.0,58.0,55.0,60.0,65.0,75.0,62.333333


In [5]:
dfout.to_excel('D:/Download/O3_years.xlsx')

In [8]:
df_jul = dfout[dfout.index.strftime('%m')=='07']['avg']
df_jul.to_excel('D:/Download/O3_years_jul.xlsx')

df_sep = dfout[dfout.index.strftime('%m')=='09']['avg']
df_sep.to_excel('D:/Download/O3_years_sep.xlsx')

df_oct = dfout[dfout.index.strftime('%m')=='10']['avg']
df_oct.to_excel('D:/Download/O3_years_oct.xlsx')