In [None]:
import requests
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup

In [None]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC

import time
import os
from datetime import datetime

In [None]:
provinces = ['BKK','Chiangmai','Khonkaen','Rayong','Saraburi','Surat']
provincesLatLng = dict()

In [None]:
for province in provinces:
    file = open(f'./{province}/position.txt','r').read().split('\n')
    temp = dict()
    temp['lat'] = file[0].split(' ')[2]
    temp['lng'] = file[1].split(' ')[2]
    provincesLatLng[province] =  temp
provincesLatLng

In [None]:
service = ChromeService(executable_path=ChromeDriverManager().install())
driver = webdriver.Chrome(service=service)

In [None]:
# 2020/07/01 01:00(utc+7) – 2021/06/30 22:00 => 2020/06/30/1800Z (utc+0) - 2021/06/30/1500Z (utc+0)
def formatTime(dt):
    temp = dt.split(' ')
    date0, time0 = temp[0] , temp[1]
    year,month,day = date0.split('-')
    date = ("/").join([day,month,year])
    hour = time0.split(':')[0]
    time = (":").join([str(int(hour)),'00','00'])
    date_time = (" ").join([date,time])
    return date_time

In [None]:
formatTime("2020-06-30 07:00 Local")

In [None]:
#use time in utc+0
def scrapeData(year,month,day,hour,province):
    lat, lng = provincesLatLng[province]['lat'], provincesLatLng[province]['lng']
    url = f'https://earth.nullschool.net/#{year}/{month:02d}/{day:02d}/{hour:02d}00Z/chem/surface/level/overlay=so2smass/equirectangular/loc={lng},{lat}'
    #go to web
    driver.get(url=url)
    element = WebDriverWait(driver,9999).until(EC.visibility_of_element_located((By.XPATH, '//*[@id="spotlight-panel"]/div[3]/div')))
    data_status = driver.find_element(By.XPATH,'/html/body/main/div[3]/div[1]/div')
    if data_status.text=="Downloading...":
        while True:
            time.sleep(0.05)
            data_status = driver.find_element(By.XPATH,'/html/body/main/div[3]/div[1]/div')
            if data_status.text=="Downloading...":
                continue
            else :
                break
    #so2
    data = element.text.split(' ')[0]
    print(data)
    return data

In [None]:
scrapeData(2020,6,30,6,'BKK')

In [None]:
nday = [0,31,28,31,30,31,30,31,31,30,31,30,31]

In [None]:
# 2017/07/01 01:00(utc+7) – 2021/06/30 22:00 => 2017/06/30/1800Z (utc+0) - 2021/06/30/1500Z (utc+0)
def getData(province):
    tempDict = dict()
    listData = []
    y = 2017
    data_temp = scrapeData(2017,6,30,18,province)
    listData.append(data_temp)
    data_temp  = scrapeData(2017,6,30,21,province)
    listData.append(data_temp)
    for m in range(7,13):
        for d in range(1,32):
            if(d > nday[m]):
                continue
            for h in range(0,24,3):
                data_temp= scrapeData(y,m,d,h,province)
                listData.append(data_temp)
    for yy in range(2018,2021):
        for m in range(1,13):
            for d in range(1,32):
                if(d > nday[m]):
                    continue
                for h in range(0,24,3):
                    data_temp= scrapeData(yy,m,d,h,province)
                    listData.append(data_temp)
    y = 2021
    for m in range(1,8):
        for d in range(1,32):
            if(d > nday[m]):
                continue
            for h in range(0,24,3):
                data_temp= scrapeData(y,m,d,h,province)
                listData.append(data_temp)
                if(m==7 and d== 1 and h == 15):
                    break
            if(m==7 and d == 1):
                break
    tempDict['so2 (ug/m^3)'] = listData
    return tempDict

In [None]:
tempDict = dict()
listData = []
data_temp = scrapeData(2020,6,30,18,'BKK')
listData.append(data_temp)
data_temp = scrapeData(2020,6,30,6,'BKK')
listData.append(data_temp)
tempDict['so2 (ppmv)'] = listData
tempDict

In [None]:
tempBKK = getData('BKK')

In [None]:
dfBKK = pd.DataFrame(tempBKK)
dfBKK.to_csv('bkk_so2_surface.csv', index=False)

In [None]:
tempChiangmai = getData('Chiangmai')

In [None]:
dfChiangmai = pd.DataFrame(tempChiangmai)
dfChiangmai.to_csv('chiangmai_so2_surface.csv', index=False)

In [None]:
tempKhonkaen = getData('Khonkaen')

In [None]:
dfKhonkaen = pd.DataFrame(tempKhonkaen)
dfKhonkaen.to_csv('khonkaen_so2_surface_test.csv', index=False)

In [None]:
tempRayong = getData('Rayong')

In [None]:
dfRayong = pd.DataFrame(tempRayong)
dfRayong.to_csv('rayong_so2_surface_test.csv', index=False)

In [None]:
tempSaraburi = getData('Saraburi')

In [None]:
dfSaraburi = pd.DataFrame(tempSaraburi)
dfSaraburi.to_csv('saraburi_so2_surface_test.csv', index=False)

In [None]:
# 2021/07/01 01:00(utc+7) – 2021/07/01 22:00 => 2021/06/30/1800Z (utc+0) - 2021/07/01/1500Z (utc+0)
def scrapeAdditionalDay(province):
    df1 = pd.read_csv(f'./{province}_so2_surface.csv')
    tempDict2 = dict()
    listTemp2 = []
    
    data_temp= scrapeData(2021,6,22,15,province)
    listTemp2.append(data_temp)
    data_temp= scrapeData(2021,6,22,18,province)
    listTemp2.append(data_temp)
    data_temp= scrapeData(2021,6,22,21,province)
    listTemp2.append(data_temp)
    for d in range(23,31):
        for h in range(0,24,3):
            data_temp= scrapeData(2021,6,d,h,province)
            listTemp2.append(data_temp)
    data_temp= scrapeData(2021,7,1,0,province)
    listTemp2.append(data_temp)
    data_temp= scrapeData(2021,7,1,3,province)
    listTemp2.append(data_temp)
    data_temp= scrapeData(2021,7,1,6,province)
    listTemp2.append(data_temp)
    data_temp= scrapeData(2021,7,1,9,province)
    listTemp2.append(data_temp)
    data_temp= scrapeData(2021,7,1,12,province)
    listTemp2.append(data_temp)
    data_temp= scrapeData(2021,7,1,15,province)
    listTemp2.append(data_temp) 
    tempDict2['so2 (ppmv)'] = listTemp2
    df2 = pd.DataFrame(tempDict2)
    return pd.concat([df1,df2])

In [None]:
dfSaraburiNew = scrapeAdditionalDay('Saraburi')
dfSaraburiNew.to_csv('Saraburi_so2_surface2.csv', index=False)

In [None]:
tempSurat = getData('Surat')

In [None]:
dfSurat = pd.DataFrame(tempSurat)
dfSurat.to_csv('surat_so2_surface_test.csv', index=False)

In [None]:
driver.quit()