# 毕业论文`GRAD`
## 建立多元线性回归模型，定量评估影响

*`Evan`*\
*`2023-11-16`*
---

In [1]:
import numpy as np
import pandas as pd

import sys
sys.path.append('../../../src/')
from namelist import *
from RandomForest import read_data, rf_importance

# silence the warning note
import warnings
warnings.filterwarnings("ignore")

In [2]:
lowyears  = [2014,2015,2016]
highyears = [2019,2021,2022]
month     = 'Sep'
regions   = ['Zhaoqing','Huizhou','Guangzhou','Foshan',
             'Dongguan','Shenzhen','Zhongshan','Jiangmen',
             'Zhuhai']
datapath  = datadir + 'Contribution/data/'

# 设置变量列表和目标变量
variants = ['SFC_TMP', 'SOL_RAD', 'QV', 'PRES', 
            'WSPD10', 'WDIR10','CloudFRAC',]
            # 'NO2', 'VOC', 'PM25', 'ISOP'] # 'PBLH',
target = 'O3'

## 读取数据，建立模型

In [3]:
for region in regions:
    df_low  = read_data(lowyears,month,region,datapath)
    df_high = read_data(highyears,month,region,datapath)

    low_importance = rf_importance(df_low,variants,target)
    high_importance = rf_importance(df_high,variants,target)
    
    df_low = df_low.reset_index()
    df_low.drop(columns=['index'],inplace=True)
    df_high = df_high.reset_index()
    df_high.drop(columns=['index'],inplace=True)

    df_diff = df_high - df_low
    diff_importance = rf_importance(df_diff,variants,target)

    # 合并两组结果，对列重命名
    dfoutput = pd.concat([low_importance,high_importance,diff_importance],axis=1)
    dfoutput.columns=['Low','High','Diff']

    filepath = 'D:/data/Graduation/Contribution/RandomForest_output/TMP/'
    filename = f'RF_{region}.xlsx'
    dfoutput.to_excel(filepath + filename,index=True)
    print(f'{region}')

Zhaoqing
Huizhou
Guangzhou
Foshan
Dongguan
Shenzhen
Zhongshan
Jiangmen
Zhuhai


## 读取输出数据，整合到一张表中

In [4]:
infilepath = 'D:/data/Graduation/Contribution/RandomForest_output/TMP/'
# outfilepath = 'D:/data/Graduation/Contribution/RandomForest_output/'
outfilepath = 'D:/data/Graduation/Contribution/RandomForest_output/RH_to_QV/'
dfs = [infilepath + f'RF_{region}.xlsx' for region in regions]

writer = pd.ExcelWriter(outfilepath + 'RF_cities.xlsx', engine='xlsxwriter')
for region in regions:
    df = pd.read_excel(infilepath + f'RF_{region}.xlsx',index_col=0)
    sheet_name = region
    df.to_excel(writer, sheet_name=sheet_name,index=True)
writer.close()