# 毕业论文`GRAD`
## 建立多元线性回归模型，定量评估影响

*`Evan`*\
*`2023-12-21`*
---

In [1]:
import numpy as np
import pandas as pd

import sys
sys.path.append('../../../src/')
from namelist import *
from RandomForest import read_data, rf_importance

# silence the warning note
import warnings
warnings.filterwarnings("ignore")

In [5]:
years = [2019,2021,2022]
regions   = ['Zhaoqing','Huizhou','Guangzhou','Foshan',
             'Dongguan','Shenzhen','Zhongshan','Jiangmen',
             'Zhuhai']
datapath  = datadir + 'Contribution/Seasonally/data/'

# 设置变量列表和目标变量
variants = ['SFC_TMP', 'SOL_RAD', 'QV', 'PRES', 
            'WSPD10', 'WDIR10','CloudFRAC',]
            # 'NO2', 'VOC', 'PM25', 'ISOP'] # 'PBLH',
target = 'O3'

## 读取数据，建立模型

In [6]:
for region in regions:
    df_summer = read_data(years,'Jul',region,datapath)
    df_autumn = read_data(years,'Sep',region,datapath)

    summer_importance = rf_importance(df_summer,variants,target)
    autumn_importance = rf_importance(df_autumn,variants,target)
    
    df_summer = df_summer.reset_index()
    df_summer.drop(columns=['index'],inplace=True)
    df_autumn = df_autumn.reset_index()
    df_autumn.drop(columns=['index'],inplace=True)

    df_diff = df_autumn - df_summer
    # 夏季减秋季，需要删去多余的行(多了一天)
    df_diff.dropna(how='all', inplace=True)
    diff_importance = rf_importance(df_diff,variants,target)

    # 合并两组结果，对列重命名
    dfoutput = pd.concat([summer_importance,autumn_importance,diff_importance],axis=1)
    dfoutput.columns=['summer','autumn','Diff']

    filepath = 'D:/data/Graduation/Contribution/Seasonally/RandomForest_output/TMP/'
    filename = f'RF_{region}.xlsx'
    dfoutput.to_excel(filepath + filename,index=True)
    print(f'{region}')

Zhaoqing
Huizhou
Guangzhou
Foshan
Dongguan
Shenzhen
Zhongshan
Jiangmen
Zhuhai


## 读取输出数据，整合到一张表中

In [7]:
infilepath = 'D:/data/Graduation/Contribution/Seasonally/RandomForest_output/TMP/'
outfilepath = 'D:/data/Graduation/Contribution/Seasonally/RandomForest_output/'
dfs = [infilepath + f'RF_{region}.xlsx' for region in regions]

writer = pd.ExcelWriter(outfilepath + 'RF_cities.xlsx', engine='xlsxwriter')
for region in regions:
    df = pd.read_excel(infilepath + f'RF_{region}.xlsx',index_col=0)
    sheet_name = region
    df.to_excel(writer, sheet_name=sheet_name,index=True)
writer.close()