# 毕业论文`GRAD`
## 建立多元线性回归模型，定量评估影响

*`Evan`*\
*`2023-11-16`*
---

In [1]:
import numpy as np
import pandas as pd

import sys
sys.path.append('../../src/')
from namelist import *
from RandomForest import read_data, rf_importance

# silence the warning note
import warnings
warnings.filterwarnings("ignore")

In [2]:
lowyears  = [2014,2015,2016]
highyears = [2019,2021,2022]
month     = 'Sep'
regions   = ['Zhaoqing','Huizhou','Guangzhou','Foshan',
             'Dongguan','Shenzhen','Zhongshan','Jiangmen',
             'Zhuhai']
datapath  = datadir + 'Contribution/data/'

# 设置变量列表和目标变量
variants = ['SFC_TMP', 'SOL_RAD', 'RH', 'PRES', 
            'WSPD10', 'WDIR10','CloudFRAC',]
            # 'NO2', 'VOC', 'PM25', 'ISOP'] # 'PBLH',
target = 'O3'

# 白天

In [3]:
for region in regions:
    print(f'{region}')
    
    df_low  = read_data(lowyears,month,region,datapath)
    df_high = read_data(highyears,month,region,datapath)

    day_low  = df_low.between_time('8:00','17:00')
    day_high = df_high.between_time('8:00','17:00')

    low_importance = rf_importance(day_low,variants,target)
    high_importance = rf_importance(day_high,variants,target)

    day_low = day_low.reset_index()
    day_low.drop(columns=['index'],inplace=True)
    day_high = day_high.reset_index()
    day_high.drop(columns=['index'],inplace=True)

    day_diff = day_high - day_low
    diff_importance = rf_importance(day_diff,variants,target)
    
    # 合并两组结果，对列重命名
    dfoutput = pd.concat([low_importance,high_importance,diff_importance],axis=1)
    dfoutput.columns=['Low','High','Diff']

    filepath = 'D:/data/Graduation/Contribution/RandomForest_output/TMP/'
    filename = f'RF_{region}_day.xlsx'
    dfoutput.to_excel(filepath + filename,index=True)

Zhaoqing
Huizhou
Guangzhou
Foshan
Dongguan
Shenzhen
Zhongshan
Jiangmen
Zhuhai


读取输出数据，整合到一张表中

In [4]:
infilepath = 'D:/data/Graduation/Contribution/RandomForest_output/TMP/'
outfilepath = 'D:/data/Graduation/Contribution/RandomForest_output/'

writer = pd.ExcelWriter(outfilepath + 'RF_cities_day.xlsx', engine='xlsxwriter')
for region in regions:
    df = pd.read_excel(infilepath + f'RF_{region}_day.xlsx',index_col=0)
    sheet_name = region
    df.to_excel(writer, sheet_name=sheet_name,index=True)
writer.close()

# 夜间

In [7]:
for region in regions:
    print(f'{region}')
    
    df_low  = read_data(lowyears,month,region,datapath)
    df_high = read_data(highyears,month,region,datapath)

    night_low  = df_low.between_time('18:00','7:00')
    night_high = df_high.between_time('18:00','7:00')

    low_importance = rf_importance(night_low,variants,target)
    high_importance = rf_importance(night_high,variants,target)

    night_low = night_low.reset_index()
    night_low.drop(columns=['index'],inplace=True)
    night_high = night_high.reset_index()
    night_high.drop(columns=['index'],inplace=True)

    night_diff = night_high - night_low
    diff_importance = rf_importance(night_diff,variants,target)
    
    # 合并两组结果，对列重命名
    dfoutput = pd.concat([low_importance,high_importance,diff_importance],axis=1)
    dfoutput.columns=['Low','High','Diff']

    filepath = 'D:/data/Graduation/Contribution/RandomForest_output/TMP/'
    filename = f'RF_{region}_night.xlsx'
    dfoutput.to_excel(filepath + filename,index=True)

Zhaoqing
Huizhou
Guangzhou
Foshan
Dongguan
Shenzhen
Zhongshan
Jiangmen
Zhuhai


In [8]:
infilepath = 'D:/data/Graduation/Contribution/RandomForest_output/TMP/'
outfilepath = 'D:/data/Graduation/Contribution/RandomForest_output/'

writer = pd.ExcelWriter(outfilepath + 'RF_cities_night.xlsx', engine='xlsxwriter')
for region in regions:
    df = pd.read_excel(infilepath + f'RF_{region}_night.xlsx',index_col=0)
    sheet_name = region
    df.to_excel(writer, sheet_name=sheet_name,index=True)
writer.close()