In [1]:
from urllib import request
from shutil import unpack_archive
import pandas as pd
from datetime import datetime
import numpy as np

In [None]:
def ExtractTaichungData(filenm): 
    """
    從輸入的檔名讀取資料並取出SiteName含"臺中市"或"台中市"的資料. 
    Inputs:
        filenm: 輸入檔案檔名.
    Output: 只含有台中測站資料的Pandas DataFrame (long table).
    """
    df = pd.read_csv("/home/shuanjeng/下載/Data/中研院"+YM+filenm+".csv")
    df = df[df["POLLUTANT"].str.contains("PM25")] 
    df = df.reset_index(drop = True)
    return df

def DailySummary(df): 
    """
    將輸入的Pandas DataFrame (long table)轉成wide table，再對每個Row取平均. Inputs:
    df: 一個Pandas DataFrame.
    Output: 一個含有日期及台中各測站日平均PM2.5濃度的Pandas DataFrame.
    """
    wide_df = pd.pivot_table(df, index=['device_id','SiteName'], columns='timestamp', values='PM25')
    temp = list(np.squeeze(list(wide_df.index)).reshape(-1))
    final_df = pd.DataFrame({"station_id":temp[0::2], "SiteName":temp[1::2], "PM25":list(wide_df.apply(np.nanmean,axis = 1)), 
                                                         "timestamp":[df["timestamp"][0][:10]]*len(wide_df.index)}) 
    return final_df

def MergeData(df): 
    """
    將加入測站所在經緯度加入輸入的Pandas DataFrame中並輸出 Inputs:
    df: 一個Pandas DataFrame (來自DailySummary).
    Output: 加入了測站所在經緯度的DataFrame
    """
    station_df = pd.read_csv("D:\\data_s\\HW2\\iis_airbox_station.csv") 
    output_df = pd.merge(df, station_df, how = 'inner', on = 'station_id')
    #刪除多於資訊
    del output_df["station_address"] 
    del output_df["station_id"]
    return output_df

def main(): 
    """
    寫一個回圈呼叫上面函數統整每天的資料並輸出成json檔
    """
    #先將30天的資料自動下載
    url = "https://ci.taiwan.gov.tw/dsp/history/iis_airbox/202109/" 
    filename = []
    for i in range(1,31): 
        if i < 10:
            temp = "0"+str(i)
            filename.append("iis_airbox_202109"+temp) 
        else:
            temp = str(i)
            filename.append("iis_airbox_202109"+temp) 
    RetriveData(url, filename)

    #迴圈輸出每日資料
    for i in range(len(filename)):
        l_df = ExtractTaichungData(filename[i])
        daily_df = DailySummary(l_df)
        merge_df = MergeData(daily_df)
        #亦可使用:locals()['df'+str(i+1)] = merge_df,此處用 global是為了方便檢查 globals()['df'+str(i+1)] = merge_df
        #合併30天的dataframe 
        if i == 0:
            json_df = df1.copy() 
        else:
            json_df = pd.concat([json_df, globals()['df'+str(i+1)]], ignore_index = True) 
    json_df.to_json("D:\\data_s\\HW2\\202109.json")

# 中研院

In [2]:
def ExtractData(filenm): 
    df = pd.read_csv("/home/shuanjeng/下載/Data/中研院/"+filenm+".csv")
    df = df[df["POLLUTANT"].str.contains("PM25")] 
    df = df.drop(df[df["SITE_ID"]=="EPA075"].index)
    df = df.reset_index(drop = True)
    return df

In [3]:
filename2 = []
for i in range (1,12):
    if i == 1:
        for j in range (1,32):
            if j < 10:
                M = "0"+str(i)
                D = "0"+str(j)+"00"
                filename2.append("2021"+M+"/"+"output_RCEC_2021"+M+D) 
            else:
                M = "0"+str(i)
                D = str(j)+"00"
                filename2.append("2021"+M+"/"+"output_RCEC_2021"+M+D) 
    if i == 2:
        for j in range (1,26):
            if j < 10:
                M = "0"+str(i)
                D = "0"+str(j)+"00"
                filename2.append("2021"+M+"/"+"output_RCEC_2021"+M+D) 
            else:
                M = "0"+str(i)
                D = str(j)+"00"
                filename2.append("2021"+M+"/"+"output_RCEC_2021"+M+D)         
    if i == 3:
        for j in range (1,32):
            if j < 10:
                M = "0"+str(i)
                D = "0"+str(j)+"00"
                filename2.append("2021"+M+"/"+"output_RCEC_2021"+M+D) 
            else:
                M = "0"+str(i)
                D = str(j)+"00"
                filename2.append("2021"+M+"/"+"output_RCEC_2021"+M+D) 
    if i == 4:
        for j in range (1,31):
            if j < 10:
                M = "0"+str(i)
                D = "0"+str(j)+"00"
                filename2.append("2021"+M+"/"+"output_RCEC_2021"+M+D) 
            else:
                M = "0"+str(i)
                D = str(j)+"00"
                filename2.append("2021"+M+"/"+"output_RCEC_2021"+M+D) 
    if i == 5:
        j = 1
        M = "0"+str(i)
        D = "0"+str(j)+"00"
        filename2.append("2021"+M+"/"+"output_RCEC_2021"+M+D) 
        for j in range (3,19):
            if j < 10:
                M = "0"+str(i)
                D = "0"+str(j)+"00"
                filename2.append("2021"+M+"/"+"output_RCEC_2021"+M+D) 
            else:
                M = "0"+str(i)
                D = str(j)+"00"
                filename2.append("2021"+M+"/"+"output_RCEC_2021"+M+D) 
        for j in range (21,32):
            if j < 10:
                M = "0"+str(i)
                D = "0"+str(j)+"00"
                filename2.append("2021"+M+"/"+"output_RCEC_2021"+M+D) 
            else:
                M = "0"+str(i)
                D = str(j)+"00"
                filename2.append("2021"+M+"/"+"output_RCEC_2021"+M+D) 
                
    if i == 6:
        for j in range (1,31):
            if j < 10:
                M = "0"+str(i)
                D = "0"+str(j)+"00"
                filename2.append("2021"+M+"/"+"output_RCEC_2021"+M+D) 
            else:
                M = "0"+str(i)
                D = str(j)+"00"
                filename2.append("2021"+M+"/"+"output_RCEC_2021"+M+D) 
    if i == 7:
        for j in range (1,32):
            if j < 10:
                M = "0"+str(i)
                D = "0"+str(j)+"00"
                filename2.append("2021"+M+"/"+"output_RCEC_2021"+M+D) 
            else:
                M = "0"+str(i)
                D = str(j)+"00"
                filename2.append("2021"+M+"/"+"output_RCEC_2021"+M+D) 
    if i == 8:
        for j in range (1,27):
            if j < 10:
                M = "0"+str(i)
                D = "0"+str(j)+"00"
                filename2.append("2021"+M+"/"+"output_RCEC_2021"+M+D) 
            else:
                M = "0"+str(i)
                D = str(j)+"00"
                filename2.append("2021"+M+"/"+"output_RCEC_2021"+M+D) 
        for j in range (28,32):
            if j < 10:
                M = "0"+str(i)
                D = "0"+str(j)+"00"
                filename2.append("2021"+M+"/"+"output_RCEC_2021"+M+D) 
            else:
                M = "0"+str(i)
                D = str(j)+"00"
                filename2.append("2021"+M+"/"+"output_RCEC_2021"+M+D) 
    if i == 9:
        for j in range (1,31):
            if j < 10:
                M = "0"+str(i)
                D = "0"+str(j)+"00"
                filename2.append("2021"+M+"/"+"output_RCEC_2021"+M+D) 
            else:
                M = "0"+str(i)
                D = str(j)+"00"
                filename2.append("2021"+M+"/"+"output_RCEC_2021"+M+D) 
    if i == 10:
        for j in range (1,32):
            if j < 10:
                M = str(i)
                D = "0"+str(j)+"00"
                filename2.append("2021"+M+"/"+"output_RCEC_2021"+M+D) 
            else:
                M = str(i)
                D = str(j)+"00"
                filename2.append("2021"+M+"/"+"output_RCEC_2021"+M+D) 
    if i == 11:
        for j in range (1,3):
            if j < 10:
                M = str(i)
                D = "0"+str(j)+"00"
                filename2.append("2021"+M+"/"+"output_RCEC_2021"+M+D) 
            else:
                M = str(i)
                D = str(j)+"00"
                filename2.append("2021"+M+"/"+"output_RCEC_2021"+M+D) 

filename1 = []
for i in range (1,13):
    if i == 1:
        for j in range (1,32):
            if j < 10:
                M = "0"+str(i)
                D = "0"+str(j)+"00"
                filename1.append("2020"+"/"+"output_RCEC_2020"+M+D) 
            else:
                M = "0"+str(i)
                D = str(j)+"00"
                filename1.append("2020"+"/"+"output_RCEC_2020"+M+D) 
    if i == 2:
        for j in range (1,30):
            if j < 10:
                M = "0"+str(i)
                D = "0"+str(j)+"00"
                filename1.append("2020"+"/"+"output_RCEC_2020"+M+D) 
            else:
                M = "0"+str(i)
                D = str(j)+"00"
                filename1.append("2020"+"/"+"output_RCEC_2020"+M+D)    
    if i == 3:
        for j in range (1,32):
            if j < 10:
                M = "0"+str(i)
                D = "0"+str(j)+"00"
                filename1.append("2020"+"/"+"output_RCEC_2020"+M+D) 
            else:
                M = "0"+str(i)
                D = str(j)+"00"
                filename1.append("2020"+"/"+"output_RCEC_2020"+M+D) 
    if i == 4:
        for j in range (1,31):
            if j < 10:
                M = "0"+str(i)
                D = "0"+str(j)+"00"
                filename1.append("2020"+"/"+"output_RCEC_2020"+M+D) 
            else:
                M = "0"+str(i)
                D = str(j)+"00"
                filename1.append("2020"+"/"+"output_RCEC_2020"+M+D) 
    if i == 5:
        for j in range (1,10):
            if j < 10:
                M = "0"+str(i)
                D = "0"+str(j)+"00"
                filename1.append("2020"+"/"+"output_RCEC_2020"+M+D) 
            else:
                M = "0"+str(i)
                D = str(j)+"00"
                filename1.append("2020"+"/"+"output_RCEC_2020"+M+D) 
        for j in range (11,28):
            if j < 10:
                M = "0"+str(i)
                D = "0"+str(j)+"00"
                filename1.append("2020"+"/"+"output_RCEC_2020"+M+D) 
            else:
                M = "0"+str(i)
                D = str(j)+"00"
                filename1.append("2020"+"/"+"output_RCEC_2020"+M+D)                 
        for j in range (29,32):
            if j < 10:
                M = "0"+str(i)
                D = "0"+str(j)+"00"
                filename1.append("2020"+"/"+"output_RCEC_2020"+M+D) 
            else:
                M = "0"+str(i)
                D = str(j)+"00"
                filename1.append("2020"+"/"+"output_RCEC_2020"+M+D)                 
    if i == 6:
        for j in range (1,31):
            if j < 10:
                M = "0"+str(i)
                D = "0"+str(j)+"00"
                filename1.append("2020"+"/"+"output_RCEC_2020"+M+D) 
            else:
                M = "0"+str(i)
                D = str(j)+"00"
                filename1.append("2020"+"/"+"output_RCEC_2020"+M+D) 
    if i == 7:
        for j in range (1,32):
            if j < 10:
                M = "0"+str(i)
                D = "0"+str(j)+"00"
                filename1.append("2020"+"/"+"output_RCEC_2020"+M+D) 
            else:
                M = "0"+str(i)
                D = str(j)+"00"
                filename1.append("2020"+"/"+"output_RCEC_2020"+M+D) 
    if i == 8:
        for j in range (1,32):
            if j < 10:
                M = "0"+str(i)
                D = "0"+str(j)+"00"
                filename1.append("2020"+"/"+"output_RCEC_2020"+M+D) 
            else:
                M = "0"+str(i)
                D = str(j)+"00"
                filename1.append("2020"+"/"+"output_RCEC_2020"+M+D) 
    if i == 9:
        for j in range (1,31):
            if j < 10:
                M = "0"+str(i)
                D = "0"+str(j)+"00"
                filename1.append("2020"+"/"+"output_RCEC_2020"+M+D) 
            else:
                M = "0"+str(i)
                D = str(j)+"00"
                filename1.append("2020"+"/"+"output_RCEC_2020"+M+D) 
    if i == 10:
        for j in range (1,5):
            if j < 10:
                M = str(i)
                D = "0"+str(j)+"00"
                filename1.append("2020"+"/"+"output_RCEC_2020"+M+D) 
            else:
                M = str(i)
                D = str(j)+"00"
                filename1.append("2020"+"/"+"output_RCEC_2020"+M+D) 
        for j in range (6,32):
            if j < 10:
                M = str(i)
                D = "0"+str(j)+"00"
                filename1.append("2020"+"/"+"output_RCEC_2020"+M+D) 
            else:
                M = str(i)
                D = str(j)+"00"
                filename1.append("2020"+"/"+"output_RCEC_2020"+M+D) 
    if i == 11:
        for j in range (1,31):
            if j < 10:
                M = str(i)
                D = "0"+str(j)+"00"
                filename1.append("2020"+"/"+"output_RCEC_2020"+M+D) 
            else:
                M = str(i)
                D = str(j)+"00"
                filename1.append("2020"+"/"+"output_RCEC_2020"+M+D) 
    if i == 12:
        for j in range (1,32):
            if j < 10:
                M = str(i)
                D = "0"+str(j)+"00"
                filename1.append("2020"+"/"+"output_RCEC_2020"+M+D) 
            else:
                M = str(i)
                D = str(j)+"00"
                filename1.append("2020"+"/"+"output_RCEC_2020"+M+D) 

In [4]:
filename=[]
for i in range (len(filename1)):
    filename.append(filename1[i]) 
for i in range (len(filename2)):
    filename.append(filename2[i]) 

In [5]:
len(filename)

662

In [6]:
for i in range(len(filename)):
    data = ExtractData(filename[i])
    if i == 0:
        csv_df = data
    else:
        csv_df = pd.concat([csv_df, data], ignore_index = True)
csv_df.to_csv("/home/shuanjeng/下載/Data/中研院/sinica.csv")

In [None]:
#迴圈輸出每日資料
for i in range(len(filename)):
    data = ExtractData(filename[i])
    globals()['df'+str(i+1)] = data
    if i == 0:
        csv_df = df1.copy()
    else:
        csv_df = pd.concat([csv_df, globals()['df'+str(i+1)]], ignore_index = True)
csv_df.to_csv("/home/shuanjeng/下載/Data/中研院/sinica.csv")

In [7]:
df = pd.read_csv("/home/shuanjeng/下載/Data/中研院/sinica.csv")
df.shape

(3624450, 7)

In [8]:
for i in range(len(filename)):
    data = ExtractData(filename[i])
    a=data.shape[0]
    if a != 5475:
        print(i,a)

# 中央大氣

In [22]:
def ExtractData(filenm): 
    df = pd.read_csv("/home/shuanjeng/下載/Data/中央大氣/"+filenm+".csv")
    df = df.reset_index(drop = True)
    return df

In [23]:
filename1 = []
for i in range (1,13):
    if i == 1:
        for j in range (1,32):
            if j < 10:
                M = "0"+str(i)
                D = "0"+str(j)+"00"
                filename1.append("output_CMAQ_2020"+M+D) 
            else:
                M = "0"+str(i)
                D = str(j)+"00"
                filename1.append("output_CMAQ_2020"+M+D) 
    if i == 2:
        for j in range (1,30):
            if j < 10:
                M = "0"+str(i)
                D = "0"+str(j)+"00"
                filename1.append("output_CMAQ_2020"+M+D) 
            else:
                M = "0"+str(i)
                D = str(j)+"00"
                filename1.append("output_CMAQ_2020"+M+D) 
    if i == 3:
        for j in range (1,32):
            if j < 10:
                M = "0"+str(i)
                D = "0"+str(j)+"00"
                filename1.append("output_CMAQ_2020"+M+D) 
            else:
                M = "0"+str(i)
                D = str(j)+"00"
                filename1.append("output_CMAQ_2020"+M+D) 
    if i == 4:
        for j in range (1,31):
            if j < 10:
                M = "0"+str(i)
                D = "0"+str(j)+"00"
                filename1.append("output_CMAQ_2020"+M+D) 
            else:
                M = "0"+str(i)
                D = str(j)+"00"
                filename1.append("output_CMAQ_2020"+M+D) 
    if i == 5:
        for j in range (1,32):
            if j < 10:
                M = "0"+str(i)
                D = "0"+str(j)+"00"
                filename1.append("output_CMAQ_2020"+M+D) 
            else:
                M = "0"+str(i)
                D = str(j)+"00"
                filename1.append("output_CMAQ_2020"+M+D)          
    if i == 6:
        for j in range (1,31):
            if j < 10:
                M = "0"+str(i)
                D = "0"+str(j)+"00"
                filename1.append("output_CMAQ_2020"+M+D) 
            else:
                M = "0"+str(i)
                D = str(j)+"00"
                filename1.append("output_CMAQ_2020"+M+D) 
    if i == 7:
        for j in range (1,32):
            if j < 10:
                M = "0"+str(i)
                D = "0"+str(j)+"00"
                filename1.append("output_CMAQ_2020"+M+D) 
            else:
                M = "0"+str(i)
                D = str(j)+"00"
                filename1.append("output_CMAQ_2020"+M+D) 
    if i == 8:
        for j in range (1,6):
            if j < 10:
                M = "0"+str(i)
                D = "0"+str(j)+"00"
                filename1.append("output_CMAQ_2020"+M+D) 
            else:
                M = "0"+str(i)
                D = str(j)+"00"
                filename1.append("output_CMAQ_2020"+M+D) 
        for j in range (7,32):
            if j < 10:
                M = "0"+str(i)
                D = "0"+str(j)+"00"
                filename1.append("output_CMAQ_2020"+M+D) 
            else:
                M = "0"+str(i)
                D = str(j)+"00"
                filename1.append("output_CMAQ_2020"+M+D) 
    if i == 9:
        for j in range (1,31):
            if j < 10:
                M = "0"+str(i)
                D = "0"+str(j)+"00"
                filename1.append("output_CMAQ_2020"+M+D) 
            else:
                M = "0"+str(i)
                D = str(j)+"00"
                filename1.append("output_CMAQ_2020"+M+D) 
    if i == 10:
        for j in range (1,5):
            if j < 10:
                M = str(i)
                D = "0"+str(j)+"00"
                filename1.append("output_CMAQ_2020"+M+D) 
            else:
                M = str(i)
                D = str(j)+"00"
                filename1.append("output_CMAQ_2020"+M+D) 
        for j in range (6,8):
            if j < 10:
                M = str(i)
                D = "0"+str(j)+"00"
                filename1.append("output_CMAQ_2020"+M+D) 
            else:
                M = str(i)
                D = str(j)+"00"
                filename1.append("output_CMAQ_2020"+M+D)                 
#     if i == 11:
#         for j in range (1,31):
#             if j < 10:
#                 M = str(i)
#                 D = "0"+str(j)+"00"
#                 filename1.append("output_CMAQ_2020"+M+D) 
#             else:
#                 M = str(i)
#                 D = str(j)+"00"
#                 filename1.append("output_CMAQ_2020"+M+D) 
#     if i == 12:
#         for j in range (1,2):
#             if j < 10:
#                 M = str(i)
#                 D = "0"+str(j)+"00"
#                 filename1.append("output_CMAQ_2020"+M+D) 
#             else:
#                 M = str(i)
#                 D = str(j)+"00"
#                 filename1.append("output_CMAQ_2020"+M+D) 

In [24]:
filename=[]
for i in range (len(filename1)):
    filename.append(filename1[i]) 

In [25]:
len(filename)

279

In [26]:
for i in range(len(filename)):
    data = ExtractData(filename[i])
    if i == 0:
        csv_df = data
    else:
        csv_df = pd.concat([csv_df, data], ignore_index = True)
csv_df.to_csv("/home/shuanjeng/下載/Data/ncu.csv")

In [27]:
df = pd.read_csv("/home/shuanjeng/下載/Data/ncu.csv")
df.shape

(1374912, 7)

In [28]:
b=0
for i in range(len(filename)):
    data = ExtractData(filename[i])
    a=data.shape[0]
    if a != 4928:
        b=b+1
        print(i,a)
print(b)

0
