In [1]:
from joblib import Parallel, delayed
import time
import os
import xml.etree.ElementTree as ET
import re
from datetime import datetime,timedelta
import pytz
import csv
import multiprocessing
import os
from multiprocessing.pool import ThreadPool as Pool
multiprocessing.set_start_method("fork", force=True)
import matplotlib.pyplot as plt

In [2]:
path = "data_info_main_par.csv"
with open(path,'w') as f:
    csv_write = csv.writer(f)
    csv_head = ["Time","Temperature","Relative Humidity"]
    # csv_head = np.array(csv_head)
    # str.encode(csv_head,'utf-8')
    csv_write.writerow(csv_head)

In [3]:
path = "data_info_area_par.csv"
with open(path,'w') as f:
    csv_write = csv.writer(f)
    csv_head = ["Time","Area","Temperature"]
    csv_write.writerow(csv_head)

In [4]:
path = "data_info_rain_par.csv"
with open(path,'w') as f:
    csv_write = csv.writer(f)
    csv_head = ["Time","area","rain"]
    csv_write.writerow(csv_head)

In [5]:
path = "data_info_reminder_par.csv"
with open(path,'w') as f:
    csv_write = csv.writer(f)
    csv_head = ["Time","reminder"]
    csv_write.writerow(csv_head)

In [6]:
path = "data_info_uv_par.csv"
with open(path,'w') as f:
    csv_write = csv.writer(f)
    csv_head = ["Time","UV","Intensity"]
    csv_write.writerow(csv_head)

In [7]:
def convert_to_beijing_time(gmt_time_str):
    
    gmt_timezone = pytz.timezone('GMT')

    gmt_time = datetime.strptime(gmt_time_str, '%a, %d %b %Y %H:%M:%S %Z').replace(tzinfo=gmt_timezone)
    
    beijing_timezone = pytz.timezone('Asia/Shanghai')
    
    beijing_time = gmt_time.astimezone(beijing_timezone)

    return beijing_time.strftime('%Y-%m-%d %H:%M:%S')

In [8]:
def write_xml_main(file_path, xml_path, file_name):

    tree = ET.parse(file_path)
    root = tree.getroot()
    
    data_row = []
    
    # Open the new file in write mode
    with open(file_name + '.txt', 'w') as f:
        for item in root.iter('item'):
            # Extract and write the time
            time = item.find('pubDate').text
            time = convert_to_beijing_time(time)
            f.write(f'Time: {time}\n')
            data_row.append(time)
    
            # Extract and write the air temperature and relative humidity
            description = item.find('description').text
            air_temp_match = re.search('Air temperature : (.*) degrees Celsius', description)
            rel_humidity_match = re.search('Relative Humidity : (.*) per cent', description)
            uv_match = re.search('the mean UV Index recorded at King\'s Park : (.*)<br/>', description)
            remind_match = re.search('reminded that:<br/>(.*)<br/></SPAN><br/>', description)
            air_temp = air_temp_match.group(1) if air_temp_match else 'NONE'
            rel_humidity = rel_humidity_match.group(1) if rel_humidity_match else 'NONE'
            uv = uv_match.group(1) if uv_match else 'NONE'
            remind = remind_match.group(1) if remind_match else 'NONE'
            f.write(f'Air Temperature: {air_temp} degrees Celsius\n')
            f.write(f'Relative Humidity: {rel_humidity} per cent\n')
            f.write(f'the mean UV Index recorded at King\'s Park : {uv}\n')
            f.write(f'remind that: {remind}\n')
            data_row.append(air_temp)
            data_row.append(rel_humidity)
            print(data_row)
    
            # Extract and write the air temperatures at other places
            f.write('Air Temperatures at Other Places:\n')
            for match in re.finditer('<tr><td><font size="-1">(.*?)</font></td><td width="100" align="right"><font size="-1">(.*?) ;</font></td></tr>', description):
                place_temp = match.group(1) + ' ' + match.group(2)
                f.write(f'{place_temp}\n')
    
            # Extract and write the rainfall recorded in various regions
            f.write('Rainfall Recorded in Various Regions:\n')
            for match in re.finditer('<tr><td>(.*?)</td><td width="100" align="right">(.*?)&nbsp;mm.', description):
                rainfall = match.group(1) + ' ' + match.group(2) + ' mm' 
                f.write(f'{rainfall}\n')
    
    # write the xml file
    with open(xml_path, 'a+') as f:
        csv_write = csv.writer(f)
        csv_write.writerow(data_row)

In [9]:
def write_xml_area(file_path, xml_path):

    tree = ET.parse(file_path)
    root = tree.getroot()
    
    # Open the new file in write mode
    for item in root.iter('item'):
        time = item.find('pubDate').text
        time = convert_to_beijing_time(time)
        
    
        description = item.find('description').text
        
        # Extract and write the air temperatures at other places
        for match in re.finditer('<tr><td><font size="-1">(.*?)</font></td><td width="100" align="right"><font size="-1">(.*?) degrees ', description):
            data_row = []
            data_row.append(time)
            place = match.group(1)
            place_temp = match.group(2)
            data_row.append(place)
            data_row.append(place_temp)
            print(data_row)
            with open(xml_path, 'a+') as f:
                csv_write = csv.writer(f)
                csv_write.writerow(data_row)

In [10]:
def write_xml_rain(file_path, xml_path):

    tree = ET.parse(file_path)
    root = tree.getroot()
    
    
    # Open the new file in write mode
    for item in root.iter('item'):
        time = item.find('pubDate').text
        time = convert_to_beijing_time(time)
        
    
        description = item.find('description').text
        
            
        for match in re.finditer('<tr><td>(.*?)</td><td width="100" align="right">(.*?)&nbsp;mm.', description):
            data_row = []
            data_row.append(time)
            rain_place = match.group(1)
            rainfall = match.group(2)
            data_row.append(rain_place)
            data_row.append(rainfall)
            print(data_row)
            if data_row.__len__() != 1:
                with open(xml_path, 'a+') as f:
                    csv_write = csv.writer(f)
                    csv_write.writerow(data_row)

In [11]:
def write_xml_uv(file_path, xml_path):

    tree = ET.parse(file_path)
    root = tree.getroot()
    
    data_row = []
    
    # Open the new file in write mode
    for item in root.iter('item'):
        time = item.find('pubDate').text
        time = convert_to_beijing_time(time)
        data_row.append(time)
    
        description = item.find('description').text
        
        # Extract and write the air temperatures at other places
        uv_match = re.search('the mean UV Index recorded at King\'s Park : (.*)<br/>\n', description)
        uv = uv_match.group(1) if uv_match else 'NONE'
        if uv != 'NONE':
            data_row.append(uv)
            Intensity = re.search('Intensity of UV radiation : (.*)<br/>', description).group(1)
            if Intensity.__len__() > 20:
                Intensity = Intensity.split('<br/>',1)[0]
                data_row.append(Intensity)
            else:
                data_row.append(Intensity)
            print(data_row)
            
    # write the xml file
    if data_row.__len__() != 1:
        with open(xml_path, 'a+') as f:
            csv_write = csv.writer(f)
            csv_write.writerow(data_row)

In [12]:
path1 = '/Users/lpl/Desktop/bigdata /bigdata1/CS5488-Group8-main/data_2018'
path2 = '/Users/lpl/Desktop/bigdata /bigdata1/CS5488-Group8-main/data_2019'
path3 = '/Users/lpl/Desktop/bigdata /bigdata1/CS5488-Group8-main/data_2020'
path4 = '/Users/lpl/Desktop/bigdata /bigdata1/CS5488-Group8-main/data_2021'
path5 = '/Users/lpl/Desktop/bigdata /bigdata1/CS5488-Group8-main/data_2022'
path6 = '/Users/lpl/Desktop/bigdata /bigdata1/CS5488-Group8-main/data_2023'

path_list=[]
path_list = os.listdir(path1)
path_list.extend(os.listdir(path2))
path_list.extend(os.listdir(path3))
path_list.extend(os.listdir(path4))
path_list.extend(os.listdir(path5))
path_list.extend(os.listdir(path6))

#path_list.remove('.DS_Store')

path_list.sort()

print(path_list)

['20181207-0830.xml', '20181207-0915.xml', '20181207-1015.xml', '20181207-1115.xml', '20181207-1215.xml', '20181207-1315.xml', '20181207-1415.xml', '20181207-1515.xml', '20181207-1615.xml', '20181207-1715.xml', '20181207-1815.xml', '20181207-1915.xml', '20181207-2015.xml', '20181207-2115.xml', '20181207-2215.xml', '20181207-2315.xml', '20181208-0015.xml', '20181208-0115.xml', '20181208-0215.xml', '20181208-0315.xml', '20181208-0415.xml', '20181208-0515.xml', '20181208-0615.xml', '20181208-0715.xml', '20181208-0815.xml', '20181208-0915.xml', '20181208-1000.xml', '20181208-1015.xml', '20181208-1115.xml', '20181208-1215.xml', '20181208-1315.xml', '20181208-1415.xml', '20181208-1515.xml', '20181208-1615.xml', '20181208-1715.xml', '20181208-1815.xml', '20181208-1915.xml', '20181208-2015.xml', '20181208-2115.xml', '20181208-2215.xml', '20181208-2315.xml', '20181209-0015.xml', '20181209-0100.xml', '20181209-0115.xml', '20181209-0215.xml', '20181209-0315.xml', '20181209-0415.xml', '20181209-05

In [47]:
path_list2=[]
path_list2 = os.listdir(path2)
path_list2.sort()
print(path_list2)

['20190101-0000.xml', '20190101-0015.xml', '20190101-0115.xml', '20190101-0215.xml', '20190101-0315.xml', '20190101-0415.xml', '20190101-0515.xml', '20190101-0615.xml', '20190101-0715.xml', '20190101-0815.xml', '20190705-0845.xml', '20190705-0900.xml', '20190705-0915.xml', '20190705-0930.xml', '20190705-0945.xml', '20190705-1000.xml', '20190705-1015.xml', '20190705-1030.xml', '20190705-1115.xml', '20190705-1130.xml', '20190705-1200.xml', '20190705-1215.xml', '20190705-1315.xml', '20190705-1330.xml', '20190705-1415.xml', '20190705-1430.xml', '20190705-1515.xml', '20190705-1530.xml', '20190705-1545.xml', '20190705-1600.xml', '20190705-1615.xml', '20190705-1630.xml', '20190705-1645.xml', '20190705-1700.xml', '20190705-1715.xml', '20190705-1815.xml', '20190705-1845.xml', '20190705-1900.xml', '20190705-1915.xml', '20190705-2015.xml', '20190705-2115.xml', '20190705-2145.xml', '20190705-2215.xml', '20190705-2230.xml', '20190705-2245.xml', '20190705-2315.xml', '20190706-0000.xml', '20190706-00

In [19]:
xml_path = '/Users/lpl/Desktop/bigdata /bigdata1/CS5488-Group8-main/data_info_main_par.csv'

In [42]:
def xml_main_parr(f):
    file_path = '/Users/lpl/Desktop/bigdata /bigdata1/CS5488-Group8-main/data_2023/' + f
    file_name = f[0:13]
    final_file_name = '/Users/lpl/Desktop/bigdata /bigdata1/CS5488-Group8-main/data_txt_new/' + file_name
    write_xml_main(file_path, xml_path, final_file_name)  

In [43]:
Parallel(n_jobs=-1)(delayed(xml_main_parr)(f) for f in path_list6)

['2022-03-12 12:02:00', '25', '55']
['2022-03-12 13:02:00', '25', '53']
['2022-03-12 14:02:00', '26', '50']
['2022-03-12 15:02:00', '25', '50']
['2022-03-12 16:02:00', '25', '50']
['2022-03-12 17:02:00', '24', '53']
['2022-03-12 18:02:00', '23', '51']
['2022-03-12 19:02:00', '22', '57']
['2022-03-12 20:02:00', '22', '58']
['2022-03-12 21:46:00', '22', '61']
['2022-03-12 22:02:00', '22', '63']
['2022-03-12 23:02:00', '22', '65']
['2022-03-13 00:02:00', '22', '68']
['2022-04-06 01:02:00', '21', '75']
['2022-04-06 02:02:00', '20', '78']
['2022-04-06 03:02:00', '20', '79']
['2022-04-06 04:02:00', '20', '81']
['2022-04-06 05:02:00', '20', '85']
['2022-04-06 06:02:00', '19', '85']
['2022-04-06 07:02:00', '20', '85']
['2022-04-06 08:02:00', '21', '82']
['2022-04-06 09:02:00', '22', '77']
['2022-04-06 10:02:00', '23', '71']
['2022-04-06 11:02:00', '25', '63']
['2022-04-06 12:02:00', '25', '63']
['2022-04-06 13:02:00', '26', '56']
['2022-04-06 14:02:00', '26', '51']
['2022-04-06 15:02:00', '26'

['2022-04-17 11:02:00', '22', '69']
['2022-04-17 12:02:00', '23', '63']
['2022-04-17 13:02:00', '24', '61']
['2022-04-17 14:02:00', '23', '59']
['2022-04-17 15:02:00', '22', '62']
['2022-04-17 16:02:00', '22', '71']
['2022-04-17 17:02:00', '22', '71']
['2022-04-17 18:02:00', '22', '73']
['2022-04-17 19:02:00', '21', '73']
['2022-04-17 20:02:00', '21', '75']
['2022-04-17 21:02:00', '22', '75']
['2022-04-17 22:02:00', '21', '75']
['2022-04-17 23:02:00', '21', '75']
['2022-04-18 00:02:00', '22', '78']
['2022-04-18 01:02:00', '21', '78']
['2022-04-18 02:02:00', '21', '78']
['2022-04-18 03:02:00', '21', '79']
['2022-04-18 04:02:00', '21', '79']
['2022-04-18 05:02:00', '21', '79']
['2022-04-18 06:02:00', '21', '79']
['2022-04-18 07:02:00', '21', '79']
['2022-04-18 08:02:00', '21', '78']
['2022-04-18 09:02:00', '21', '74']
['2022-04-18 10:02:00', '22', '76']
['2022-04-18 11:02:00', '22', '71']
['2022-04-18 12:02:00', '23', '69']
['2022-04-18 13:02:00', '23', '72']
['2022-04-18 14:02:00', '23'

['2022-07-07 01:02:00', '28', '87']
['2022-07-07 02:02:00', '28', '87']
['2022-07-07 03:02:00', '28', '86']
['2022-07-07 04:21:00', '28', '86']
['2022-07-07 05:02:00', '28', '88']
['2022-07-07 06:16:00', '27', '94']
['2022-07-07 07:16:00', '27', '91']
['2022-07-07 08:02:00', '28', '89']
['2022-07-07 09:02:00', '28', '88']
['2022-07-07 10:02:00', '29', '86']
['2022-07-07 11:02:00', '30', '82']
['2022-07-07 12:27:00', '31', '79']
['2022-07-07 13:02:00', '31', '75']
['2022-07-07 14:11:00', '27', '91']
['2022-07-07 15:16:00', '29', '86']
['2022-07-07 16:48:00', '29', '83']
['2022-07-07 17:02:00', '29', '85']
['2022-07-07 18:02:00', '29', '87']
['2022-07-07 19:02:00', '29', '85']
['2022-07-07 20:02:00', '29', '85']
['2022-07-07 21:02:00', '29', '86']
['2022-07-07 22:02:00', '29', '84']
['2022-07-07 23:02:00', '29', '87']
['2022-07-08 00:02:00', '29', '88']
['2022-07-08 01:02:00', '28', '89']
['2022-07-08 02:02:00', '28', '89']
['2022-07-08 03:02:00', '28', '91']
['2022-07-08 04:02:00', '28'

['2022-04-30 15:02:00', '25', '83']
['2022-04-30 16:02:00', '25', '83']
['2022-04-30 17:02:00', '25', '82']
['2022-04-30 18:02:00', '25', '82']
['2022-04-30 19:02:00', '25', '82']
['2022-04-30 20:02:00', '25', '81']
['2022-04-30 21:02:00', '25', '83']
['2022-04-30 22:02:00', '25', '84']
['2022-04-30 23:02:00', '24', '84']
['2022-05-01 00:02:00', '24', '85']
['2022-05-01 01:02:00', '24', '87']
['2022-05-01 02:02:00', '24', '85']
['2022-05-01 03:02:00', '24', '84']
['2022-05-01 04:02:00', '24', '83']
['2022-05-01 05:02:00', '24', '84']
['2022-05-01 06:02:00', '23', '86']
['2022-05-01 07:02:00', '23', '84']
['2022-05-01 08:02:00', '22', '86']
['2022-05-01 09:02:00', '22', '87']
['2022-05-01 10:02:00', '21', '89']
['2022-05-01 11:02:00', '21', '92']
['2022-05-01 12:02:00', '21', '93']
['2022-05-01 13:02:00', '20', '92']
['2022-05-01 14:02:00', '20', '91']
['2022-05-01 15:02:00', '19', '91']
['2022-05-01 16:02:00', '19', '90']
['2022-05-01 17:46:00', '19', '84']
['2022-05-01 18:02:00', '18'

['2022-10-21 06:02:00', '23', '74']
['2022-10-21 07:02:00', '23', '70']
['2022-10-21 08:02:00', '24', '68']
['2022-10-21 09:02:00', '24', '67']
['2022-10-21 10:02:00', '25', '61']
['2022-10-21 11:02:00', '27', '59']
['2022-10-21 12:02:00', '27', '54']
['2022-10-21 13:02:00', '28', '56']
['2022-10-21 14:02:00', '28', '54']
['2022-10-21 15:02:00', '28', '58']
['2022-10-21 16:02:00', '28', '61']
['2022-10-21 17:02:00', '26', '65']
['2022-10-21 18:02:00', '25', '67']
['2022-10-21 19:02:00', '25', '68']
['2022-10-21 20:02:00', '25', '73']
['2022-10-21 21:02:00', '25', '76']
['2022-10-21 22:02:00', '25', '77']
['2022-10-21 23:02:00', '24', '79']
['2022-10-22 00:02:00', '24', '79']
['2022-10-22 01:02:00', '24', '80']
['2022-10-22 02:02:00', '24', '79']
['2022-10-22 03:02:00', '24', '82']
['2022-10-22 04:02:00', '24', '83']
['2022-10-22 05:02:00', '23', '84']
['2022-10-22 06:02:00', '23', '85']
['2022-10-22 07:02:00', '23', '85']
['2022-10-22 08:02:00', '23', '83']
['2022-10-22 09:02:00', '25'

['2022-11-19 01:02:00', '24', '78']
['2022-11-19 02:02:00', '24', '79']
['2022-11-19 03:02:00', '24', '79']
['2022-11-21 10:02:00', '24', '74']
['2022-11-21 11:02:00', '25', '71']
['2022-11-21 12:02:00', '25', '69']
['2022-11-21 13:02:00', '25', '71']
['2022-11-21 14:02:00', '25', '74']
['2022-11-21 15:02:00', '25', '74']
['2022-11-21 16:02:00', '25', '79']
['2022-11-21 17:02:00', '23', '89']
['2022-11-21 18:02:00', '23', '83']
['2022-11-23 07:02:00', '23', '92']
['2022-11-23 08:02:00', '23', '93']
['2022-11-23 09:02:00', '24', '92']
['2022-11-23 10:02:00', '24', '91']
['2022-11-23 11:02:00', '25', '89']
['2022-11-23 12:02:00', '25', '88']
['2022-11-23 13:02:00', '25', '88']
['2022-11-23 14:02:00', '24', '88']
['2022-11-23 15:02:00', '24', '91']
['2022-11-25 22:02:00', '22', '95']
['2022-11-25 23:02:00', '22', '94']
['2022-11-26 00:02:00', '22', '95']
['2022-11-26 01:02:00', '22', '95']
['2022-11-26 02:02:00', '22', '95']
['2022-11-26 03:02:00', '22', '92']
['2022-11-26 04:02:00', '22'

['2022-12-28 05:02:00', '15', '81']
['2022-12-28 06:02:00', '15', '79']
['2022-12-28 07:02:00', '15', '84']
['2022-12-28 08:02:00', '15', '78']
['2022-12-28 09:02:00', '16', '74']
['2022-12-28 10:02:00', '17', '71']
['2022-12-28 11:02:00', '19', '56']
['2022-12-28 12:02:00', '19', '56']
['2022-12-28 13:02:00', '20', '58']
['2022-12-30 12:02:00', '16', '59']
['2022-12-30 13:02:00', '17', '57']
['2022-12-30 14:02:00', '17', '56']
['2022-12-30 15:02:00', '17', '52']
['2022-12-30 16:02:00', '17', '54']
['2022-12-30 17:02:00', '17', '58']
['2022-12-30 18:02:00', '16', '58']
['2022-12-30 19:02:00', '16', '56']
['2022-12-30 20:02:00', '15', '58']
['2023-01-01 02:02:00', '15', '65']
['2023-01-01 05:02:00', '15', '69']
['2023-01-01 09:02:00', '15', '66']
['2023-01-01 12:02:00', '18', '61']
['2023-01-01 14:02:00', '18', '60']
['2023-01-01 20:02:00', '18', '62']
['2023-01-01 22:31:00', '18', '66']
['2023-01-02 23:31:00', '18', '66']
['2023-01-03 00:02:00', '18', '64']
['2023-01-03 05:02:00', '17'

[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,

In [56]:
def xml_area_parr(f):
    xml_path = '/Users/lpl/Desktop/bigdata /bigdata1/CS5488-Group8-main/data_info_area_par.csv'
    file_path = '/Users/lpl/Desktop/bigdata /bigdata1/CS5488-Group8-main/data_2023/' + f
    file_name = f[0:13]
    write_xml_area(file_path, xml_path)

In [57]:
Parallel(n_jobs=-1)(delayed(xml_area_parr)(f) for f in path_list6)

['2022-12-22 13:02:00', 'Ta Kwu Ling', '21']
['2022-12-22 13:02:00', 'Lau Fau Shan', '22']
['2022-12-22 13:02:00', 'Tai Po', '21']
['2022-12-22 13:02:00', 'Sha Tin', '20']
['2022-12-22 13:02:00', 'Tuen Mun', '21']
['2022-12-22 13:02:00', 'Tseung Kwan O', '22']
['2022-12-22 13:02:00', 'Sai Kung', '19']
['2022-12-22 13:02:00', 'Cheung Chau', '22']
['2022-12-22 13:02:00', 'Chek Lap Kok', '19']
['2022-12-22 13:02:00', 'Tsing Yi', '21']
['2022-12-22 13:02:00', 'Shek Kong', '21']
['2022-12-22 13:02:00', 'Tsuen Wan Ho Koon', '19']
['2022-12-22 13:02:00', 'Tsuen Wan Shing Mun Valley', '21']
['2022-12-22 13:02:00', 'Hong Kong Park', '22']
['2022-12-22 13:02:00', 'Shau Kei Wan', '19']
['2022-12-22 13:02:00', 'Kowloon City', '20']
['2022-12-22 13:02:00', 'Happy Valley', '20']
['2022-12-22 13:02:00', 'Wong Tai Sin', '21']
['2022-12-22 13:02:00', 'Stanley', '19']
['2022-12-22 13:02:00', 'Kwun Tong', '19']
['2022-12-22 13:02:00', 'Sham Shui Po', '21']
['2022-12-22 13:02:00', 'Kai Tak Runway Park', '

['2023-01-04 07:02:00', 'Tsing Yi', '16']
['2023-01-04 07:02:00', 'Shek Kong', '14']
['2023-01-04 07:02:00', 'Tsuen Wan Ho Koon', '13']
['2023-01-04 07:02:00', 'Tsuen Wan Shing Mun Valley', '14']
['2023-01-04 07:02:00', 'Hong Kong Park', '16']
['2023-01-04 07:02:00', 'Shau Kei Wan', '15']
['2023-01-04 07:02:00', 'Happy Valley', '17']
['2023-01-04 07:02:00', 'Wong Tai Sin', '16']
['2023-01-04 07:02:00', 'Stanley', '16']
['2023-01-04 07:02:00', 'Kwun Tong', '15']
['2023-01-04 07:02:00', 'Sham Shui Po', '15']
['2023-01-04 07:02:00', 'Kai Tak Runway Park', '16']
['2023-01-04 07:02:00', 'Yuen Long Park', '15']
['2023-01-04 07:02:00', 'Tai Mei Tuk', '15']
['2023-01-04 08:02:00', 'Hong Kong Observatory', '16']
['2023-01-04 08:02:00', "King's Park", '15']
['2023-01-04 08:02:00', 'Wong Chuk Hang', '16']
['2023-01-04 08:02:00', 'Ta Kwu Ling', '14']
['2023-01-04 08:02:00', 'Lau Fau Shan', '15']
['2023-01-04 08:02:00', 'Tai Po', '15']
['2023-01-04 08:02:00', 'Sha Tin', '15']
['2023-01-04 08:02:00'

['2023-01-13 07:02:00', 'Hong Kong Observatory', '20']
['2023-01-13 07:02:00', "King's Park", '19']
['2023-01-13 07:02:00', 'Wong Chuk Hang', '21']
['2023-01-13 07:02:00', 'Ta Kwu Ling', '20']
['2023-01-13 07:02:00', 'Lau Fau Shan', '20']
['2023-01-13 07:02:00', 'Tai Po', '20']
['2023-01-13 07:02:00', 'Sha Tin', '21']
['2023-01-13 07:02:00', 'Tuen Mun', '21']
['2023-01-13 07:02:00', 'Tseung Kwan O', '19']
['2023-01-13 07:02:00', 'Sai Kung', '19']
['2023-01-13 07:02:00', 'Cheung Chau', '20']
['2023-01-13 07:02:00', 'Chek Lap Kok', '21']
['2023-01-13 07:02:00', 'Tsing Yi', '21']
['2023-01-13 07:02:00', 'Shek Kong', '21']
['2023-01-13 07:02:00', 'Tsuen Wan Ho Koon', '20']
['2023-01-13 07:02:00', 'Tsuen Wan Shing Mun Valley', '21']
['2023-01-13 07:02:00', 'Hong Kong Park', '19']
['2023-01-13 07:02:00', 'Shau Kei Wan', '19']
['2023-01-13 07:02:00', 'Kowloon City', '19']
['2023-01-13 07:02:00', 'Happy Valley', '20']
['2023-01-13 07:02:00', 'Wong Tai Sin', '20']
['2023-01-13 07:02:00', 'Stanl

['2023-01-30 08:46:00', 'Hong Kong Park', '11']
['2023-01-30 08:46:00', 'Shau Kei Wan', '13']
['2023-01-30 08:46:00', 'Kowloon City', '12']
['2023-01-30 08:46:00', 'Happy Valley', '9']
['2023-01-30 08:46:00', 'Wong Tai Sin', '11']
['2023-01-30 08:46:00', 'Stanley', '12']
['2023-01-30 08:46:00', 'Kwun Tong', '11']
['2023-01-30 08:46:00', 'Sham Shui Po', '12']
['2023-01-30 08:46:00', 'Kai Tak Runway Park', '13']
['2023-01-30 08:46:00', 'Yuen Long Park', '6']
['2023-01-30 08:46:00', 'Tai Mei Tuk', '10']
['2023-01-30 09:46:00', 'Hong Kong Observatory', '13']
['2023-01-30 09:46:00', "King's Park", '14']
['2023-01-30 09:46:00', 'Wong Chuk Hang', '13']
['2023-01-30 09:46:00', 'Ta Kwu Ling', '8']
['2023-01-30 09:46:00', 'Lau Fau Shan', '12']
['2023-01-30 09:46:00', 'Tai Po', '12']
['2023-01-30 09:46:00', 'Sha Tin', '13']
['2023-01-30 09:46:00', 'Tuen Mun', '13']
['2023-01-30 09:46:00', 'Tseung Kwan O', '15']
['2023-01-30 09:46:00', 'Sai Kung', '15']
['2023-01-30 09:46:00', 'Cheung Chau', '12']

['2023-02-08 19:02:00', 'Wong Tai Sin', '19']
['2023-02-08 19:02:00', 'Stanley', '18']
['2023-02-08 19:02:00', 'Kwun Tong', '18']
['2023-02-08 19:02:00', 'Sham Shui Po', '19']
['2023-02-08 19:02:00', 'Kai Tak Runway Park', '18']
['2023-02-08 19:02:00', 'Yuen Long Park', '20']
['2023-02-08 19:02:00', 'Tai Mei Tuk', '18']
['2023-02-08 20:02:00', 'Hong Kong Observatory', '19']
['2023-02-08 20:02:00', "King's Park", '18']
['2023-02-08 20:02:00', 'Wong Chuk Hang', '19']
['2023-02-08 20:02:00', 'Ta Kwu Ling', '18']
['2023-02-08 20:02:00', 'Lau Fau Shan', '19']
['2023-02-08 20:02:00', 'Tai Po', '19']
['2023-02-08 20:02:00', 'Sha Tin', '18']
['2023-02-08 20:02:00', 'Tuen Mun', '19']
['2023-02-08 20:02:00', 'Tseung Kwan O', '18']
['2023-02-08 20:02:00', 'Sai Kung', '18']
['2023-02-08 20:02:00', 'Cheung Chau', '18']
['2023-02-08 20:02:00', 'Chek Lap Kok', '19']
['2023-02-08 20:02:00', 'Tsing Yi', '19']
['2023-02-08 20:02:00', 'Shek Kong', '19']
['2023-02-08 20:02:00', 'Tsuen Wan Ho Koon', '18']


['2023-02-22 14:02:00', 'Yuen Long Park', '22']
['2023-02-22 14:02:00', 'Tai Mei Tuk', '21']
['2023-02-22 15:02:00', 'Hong Kong Observatory', '20']
['2023-02-22 15:02:00', "King's Park", '19']
['2023-02-22 15:02:00', 'Wong Chuk Hang', '19']
['2023-02-22 15:02:00', 'Ta Kwu Ling', '22']
['2023-02-22 15:02:00', 'Lau Fau Shan', '19']
['2023-02-22 15:02:00', 'Tai Po', '20']
['2023-02-22 15:02:00', 'Sha Tin', '20']
['2023-02-22 15:02:00', 'Tuen Mun', '20']
['2023-02-22 15:02:00', 'Tseung Kwan O', '19']
['2023-02-22 15:02:00', 'Sai Kung', '17']
['2023-02-22 15:02:00', 'Cheung Chau', '18']
['2023-02-22 15:02:00', 'Chek Lap Kok', '19']
['2023-02-22 15:02:00', 'Tsing Yi', '19']
['2023-02-22 15:02:00', 'Shek Kong', '23']
['2023-02-22 15:02:00', 'Tsuen Wan Ho Koon', '20']
['2023-02-22 15:02:00', 'Tsuen Wan Shing Mun Valley', '22']
['2023-02-22 15:02:00', 'Hong Kong Park', '19']
['2023-02-22 15:02:00', 'Shau Kei Wan', '17']
['2023-02-22 15:02:00', 'Kowloon City', '21']
['2023-02-22 15:02:00', 'Happ

['2023-02-26 20:02:00', 'Cheung Chau', '15']
['2023-02-26 20:02:00', 'Chek Lap Kok', '17']
['2023-02-26 20:02:00', 'Tsing Yi', '17']
['2023-02-26 20:02:00', 'Shek Kong', '16']
['2023-02-26 20:02:00', 'Tsuen Wan Ho Koon', '14']
['2023-02-26 20:02:00', 'Tsuen Wan Shing Mun Valley', '15']
['2023-02-26 20:02:00', 'Hong Kong Park', '16']
['2023-02-26 20:02:00', 'Shau Kei Wan', '16']
['2023-02-26 20:02:00', 'Kowloon City', '15']
['2023-02-26 20:02:00', 'Happy Valley', '17']
['2023-02-26 20:02:00', 'Wong Tai Sin', '16']
['2023-02-26 20:02:00', 'Stanley', '16']
['2023-02-26 20:02:00', 'Kwun Tong', '15']
['2023-02-26 20:02:00', 'Sham Shui Po', '16']
['2023-02-26 20:02:00', 'Kai Tak Runway Park', '17']
['2023-02-26 20:02:00', 'Yuen Long Park', '15']
['2023-02-26 20:02:00', 'Tai Mei Tuk', '15']
['2023-02-26 21:02:00', 'Hong Kong Observatory', '17']
['2023-02-26 21:02:00', "King's Park", '16']
['2023-02-26 21:02:00', 'Wong Chuk Hang', '16']
['2023-02-26 21:02:00', 'Ta Kwu Ling', '15']
['2023-02-26

['2023-03-14 16:02:00', 'Yuen Long Park', '21']
['2023-03-14 16:02:00', 'Tai Mei Tuk', '21']
['2023-03-14 17:02:00', 'Hong Kong Observatory', '20']
['2023-03-14 17:02:00', "King's Park", '19']
['2023-03-14 17:02:00', 'Wong Chuk Hang', '21']
['2023-03-14 17:02:00', 'Ta Kwu Ling', '21']
['2023-03-14 17:02:00', 'Lau Fau Shan', '21']
['2023-03-14 17:02:00', 'Tai Po', '20']
['2023-03-14 17:02:00', 'Sha Tin', '20']
['2023-03-14 17:02:00', 'Tuen Mun', '20']
['2023-03-14 17:02:00', 'Tseung Kwan O', '20']
['2023-03-14 17:02:00', 'Sai Kung', '20']
['2023-03-14 17:02:00', 'Cheung Chau', '19']
['2023-03-14 17:02:00', 'Chek Lap Kok', '21']
['2023-03-14 17:02:00', 'Tsing Yi', '21']
['2023-03-14 17:02:00', 'Shek Kong', '21']
['2023-03-14 17:02:00', 'Tsuen Wan Ho Koon', '19']
['2023-03-14 17:02:00', 'Tsuen Wan Shing Mun Valley', '20']
['2023-03-14 17:02:00', 'Hong Kong Park', '20']
['2023-03-14 17:02:00', 'Shau Kei Wan', '19']
['2023-03-14 17:02:00', 'Kowloon City', '20']
['2023-03-14 17:02:00', 'Happ

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



['2023-07-08 05:02:00', 'Hong Kong Observatory', '29']
['2023-07-08 05:02:00', "King's Park", '28']
['2023-07-08 05:02:00', 'Wong Chuk Hang', '28']
['2023-07-08 05:02:00', 'Ta Kwu Ling', '25']
['2023-07-08 05:02:00', 'Lau Fau Shan', '28']
['2023-07-08 05:02:00', 'Tai Po', '29']
['2023-07-08 05:02:00', 'Sha Tin', '29']
['2023-07-08 05:02:00', 'Tuen Mun', '28']
['2023-07-08 05:02:00', 'Tseung Kwan O', '28']
['2023-07-08 05:02:00', 'Sai Kung', '28']
['2023-07-08 05:02:00', 'Cheung Chau', '27']
['2023-07-08 05:02:00', 'Chek Lap Kok', '29']
['2023-07-08 05:02:00', 'Tsing Yi', '28']
['2023-07-08 05:02:00', 'Tsuen Wan Ho Koon', '27']
['2023-07-08 05:02:00', 'Tsuen Wan Shing Mun Valley', '26']
['2023-07-08 05:02:00', 'Hong Kong Park', '29']
['2023-07-08 05:02:00', 'Shau Kei Wan', '28']
['2023-07-08 05:02:00', 'Kowloon City', '28']
['2023-07-08 05:02:00', 'Happy Valley', '29']
['2023-07-08 05:02:00', 'Wong Tai Sin', '29']
['2023-07-08 05:02:00', 'Stanley', '28']
['2023-07-08 05:02:00', 'Kwun To

['2023-07-30 04:02:00', 'Tai Mei Tuk', '28']
['2023-07-30 05:02:00', 'Hong Kong Observatory', '29']
['2023-07-30 05:02:00', "King's Park", '28']
['2023-07-30 05:02:00', 'Wong Chuk Hang', '29']
['2023-07-30 05:02:00', 'Ta Kwu Ling', '27']
['2023-07-30 05:02:00', 'Tai Po', '27']
['2023-07-30 05:02:00', 'Sha Tin', '28']
['2023-07-30 05:02:00', 'Tuen Mun', '27']
['2023-07-30 05:02:00', 'Tseung Kwan O', '28']
['2023-07-30 05:02:00', 'Sai Kung', '28']
['2023-07-30 05:02:00', 'Cheung Chau', '28']
['2023-07-30 05:02:00', 'Chek Lap Kok', '29']
['2023-07-30 05:02:00', 'Tsing Yi', '28']
['2023-07-30 05:02:00', 'Shek Kong', '29']
['2023-07-30 05:02:00', 'Tsuen Wan Ho Koon', '27']
['2023-07-30 05:02:00', 'Tsuen Wan Shing Mun Valley', '27']
['2023-07-30 05:02:00', 'Hong Kong Park', '28']
['2023-07-30 05:02:00', 'Shau Kei Wan', '29']
['2023-07-30 05:02:00', 'Kowloon City', '29']
['2023-07-30 05:02:00', 'Happy Valley', '29']
['2023-07-30 05:02:00', 'Wong Tai Sin', '28']
['2023-07-30 05:02:00', 'Stanle

['2023-08-10 04:02:00', 'Hong Kong Observatory', '29']
['2023-08-10 04:02:00', "King's Park", '29']
['2023-08-10 04:02:00', 'Wong Chuk Hang', '29']
['2023-08-10 04:02:00', 'Ta Kwu Ling', '27']
['2023-08-10 04:02:00', 'Lau Fau Shan', '29']
['2023-08-10 04:02:00', 'Tai Po', '28']
['2023-08-10 04:02:00', 'Sha Tin', '29']
['2023-08-10 04:02:00', 'Tuen Mun', '29']
['2023-08-10 04:02:00', 'Tseung Kwan O', '28']
['2023-08-10 04:02:00', 'Sai Kung', '29']
['2023-08-10 04:02:00', 'Cheung Chau', '28']
['2023-08-10 04:02:00', 'Chek Lap Kok', '30']
['2023-08-10 04:02:00', 'Tsing Yi', '29']
['2023-08-10 04:02:00', 'Shek Kong', '28']
['2023-08-10 04:02:00', 'Tsuen Wan Ho Koon', '27']
['2023-08-10 04:02:00', 'Tsuen Wan Shing Mun Valley', '28']
['2023-08-10 04:02:00', 'Hong Kong Park', '29']
['2023-08-10 04:02:00', 'Shau Kei Wan', '29']
['2023-08-10 04:02:00', 'Kowloon City', '29']
['2023-08-10 04:02:00', 'Happy Valley', '30']
['2023-08-10 04:02:00', 'Wong Tai Sin', '29']
['2023-08-10 04:02:00', 'Stanl

['2023-08-21 19:02:00', 'Cheung Chau', '27']
['2023-08-21 19:02:00', 'Chek Lap Kok', '30']
['2023-08-21 19:02:00', 'Tsing Yi', '29']
['2023-08-21 19:02:00', 'Shek Kong', '29']
['2023-08-21 19:02:00', 'Tsuen Wan Ho Koon', '27']
['2023-08-21 19:02:00', 'Tsuen Wan Shing Mun Valley', '28']
['2023-08-21 19:02:00', 'Hong Kong Park', '29']
['2023-08-21 19:02:00', 'Shau Kei Wan', '27']
['2023-08-21 19:02:00', 'Kowloon City', '29']
['2023-08-21 19:02:00', 'Happy Valley', '30']
['2023-08-21 19:02:00', 'Wong Tai Sin', '29']
['2023-08-21 19:02:00', 'Stanley', '28']
['2023-08-21 19:02:00', 'Kwun Tong', '28']
['2023-08-21 19:02:00', 'Sham Shui Po', '29']
['2023-08-21 19:02:00', 'Kai Tak Runway Park', '29']
['2023-08-21 19:02:00', 'Yuen Long Park', '29']
['2023-08-21 19:02:00', 'Tai Mei Tuk', '28']
['2023-08-21 20:02:00', 'Hong Kong Observatory', '30']
['2023-08-21 20:02:00', "King's Park", '29']
['2023-08-21 20:02:00', 'Wong Chuk Hang', '29']
['2023-08-21 20:02:00', 'Ta Kwu Ling', '28']
['2023-08-21

['2023-09-03 06:02:00', 'Happy Valley', '28']
['2023-09-03 06:02:00', 'Wong Tai Sin', '27']
['2023-09-03 06:02:00', 'Stanley', '27']
['2023-09-03 06:02:00', 'Kwun Tong', '26']
['2023-09-03 06:02:00', 'Sham Shui Po', '27']
['2023-09-03 06:02:00', 'Kai Tak Runway Park', '27']
['2023-09-03 06:02:00', 'Yuen Long Park', '27']
['2023-09-03 06:02:00', 'Tai Mei Tuk', '26']
['2023-09-03 07:02:00', 'Hong Kong Observatory', '27']
['2023-09-03 07:02:00', "King's Park", '27']
['2023-09-03 07:02:00', 'Wong Chuk Hang', '28']
['2023-09-03 07:02:00', 'Ta Kwu Ling', '26']
['2023-09-03 07:02:00', 'Lau Fau Shan', '27']
['2023-09-03 07:02:00', 'Tai Po', '27']
['2023-09-03 07:02:00', 'Sha Tin', '28']
['2023-09-03 07:02:00', 'Tuen Mun', '27']
['2023-09-03 07:02:00', 'Tseung Kwan O', '27']
['2023-09-03 07:02:00', 'Sai Kung', '27']
['2023-09-03 07:02:00', 'Chek Lap Kok', '28']
['2023-09-03 07:02:00', 'Tsing Yi', '28']
['2023-09-03 07:02:00', 'Shek Kong', '27']
['2023-09-03 07:02:00', 'Tsuen Wan Ho Koon', '25']

['2023-09-18 14:02:00', 'Hong Kong Park', '32']
['2023-09-18 14:02:00', 'Shau Kei Wan', '29']
['2023-09-18 14:02:00', 'Kowloon City', '32']
['2023-09-18 14:02:00', 'Happy Valley', '33']
['2023-09-18 14:02:00', 'Wong Tai Sin', '32']
['2023-09-18 14:02:00', 'Stanley', '32']
['2023-09-18 14:02:00', 'Kwun Tong', '32']
['2023-09-18 14:02:00', 'Sham Shui Po', '33']
['2023-09-18 14:02:00', 'Kai Tak Runway Park', '30']
['2023-09-18 14:02:00', 'Yuen Long Park', '34']
['2023-09-18 14:02:00', 'Tai Mei Tuk', '31']
['2023-09-18 15:17:00', 'Hong Kong Observatory', '32']
['2023-09-18 15:17:00', "King's Park", '31']
['2023-09-18 15:17:00', 'Wong Chuk Hang', '31']
['2023-09-18 15:17:00', 'Ta Kwu Ling', '33']
['2023-09-18 15:17:00', 'Lau Fau Shan', '30']
['2023-09-18 15:17:00', 'Tai Po', '32']
['2023-09-18 15:17:00', 'Sha Tin', '32']
['2023-09-18 15:17:00', 'Tuen Mun', '32']
['2023-09-18 15:17:00', 'Tseung Kwan O', '32']
['2023-09-18 15:17:00', 'Sai Kung', '30']
['2023-09-18 15:17:00', 'Cheung Chau', '3

['2023-10-04 01:02:00', 'Cheung Chau', '27']
['2023-10-04 01:02:00', 'Chek Lap Kok', '29']
['2023-10-04 01:02:00', 'Tsing Yi', '27']
['2023-10-04 01:02:00', 'Tsuen Wan Ho Koon', '27']
['2023-10-04 01:02:00', 'Tsuen Wan Shing Mun Valley', '27']
['2023-10-04 01:02:00', 'Hong Kong Park', '28']
['2023-10-04 01:02:00', 'Shau Kei Wan', '28']
['2023-10-04 01:02:00', 'Kowloon City', '28']
['2023-10-04 01:02:00', 'Happy Valley', '29']
['2023-10-04 01:02:00', 'Wong Tai Sin', '28']
['2023-10-04 01:02:00', 'Stanley', '28']
['2023-10-04 01:02:00', 'Kwun Tong', '28']
['2023-10-04 01:02:00', 'Sham Shui Po', '28']
['2023-10-04 01:02:00', 'Kai Tak Runway Park', '29']
['2023-10-04 01:02:00', 'Yuen Long Park', '27']
['2023-10-04 01:02:00', 'Tai Mei Tuk', '27']
['2023-10-04 02:02:00', 'Hong Kong Observatory', '29']
['2023-10-04 02:02:00', "King's Park", '28']
['2023-10-04 02:02:00', 'Wong Chuk Hang', '27']
['2023-10-04 02:02:00', 'Ta Kwu Ling', '26']
['2023-10-04 02:02:00', 'Lau Fau Shan', '28']
['2023-10

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [68]:
def xml_rain_parr(f):
    xml_path = '/Users/lpl/Desktop/bigdata /bigdata1/CS5488-Group8-main/data_info_rain_par.csv'
    file_path = '/Users/lpl/Desktop/bigdata /bigdata1/CS5488-Group8-main/data_2023/' + f
    file_name = f[0:13]
    write_xml_rain(file_path, xml_path)

In [69]:
Parallel(n_jobs=-1)(delayed(xml_rain_parr)(f) for f in path_list6)

['2022-08-10 04:31:00', 'Kwai Tsing', '1 to 4']
['2022-08-10 04:31:00', 'Eastern District', '1 to 3']
['2022-08-10 04:31:00', 'Central &amp; Western District', '1 to 2']
['2022-08-10 04:31:00', 'Kowloon City', '1 to 2']
['2022-08-10 04:31:00', 'Wan Chai', '1 to 2']
['2022-08-10 04:31:00', 'Tsuen Wan', '0 to 5']
['2022-08-10 04:31:00', 'Islands District', '0 to 4']
['2022-08-10 04:31:00', 'North District', '0 to 4']
['2022-08-10 04:31:00', 'Sha Tin', '0 to 3']
['2022-08-10 04:31:00', 'Yuen Long', '0 to 3']
['2022-08-10 04:31:00', 'Wong Tai Sin', '0 to 2']
['2022-08-10 04:31:00', 'Kwun Tong', '0 to 1']
['2022-08-10 04:31:00', 'Southern District', '0 to 1']
['2022-08-10 04:31:00', 'Tai Po', '0 to 1']
['2022-08-10 04:31:00', 'Tuen Mun', '0 to 1']
['2022-08-10 05:02:00', 'Yuen Long', '1 to 4']
['2022-08-10 05:02:00', 'Islands District', '0 to 3']
['2022-08-10 05:02:00', 'North District', '0 to 3']
['2022-08-10 05:02:00', 'Tsuen Wan', '0 to 3']
['2022-08-10 05:02:00', 'Tuen Mun', '0 to 3']
[

['2022-08-04 23:02:00', 'Sha Tin', '0 to 6']
['2022-08-04 23:02:00', 'Tsuen Wan', '0 to 6']
['2022-08-04 23:02:00', 'Eastern District', '0 to 5']
['2022-08-04 23:02:00', 'Islands District', '0 to 4']
['2022-08-04 23:02:00', 'Tai Po', '0 to 4']
['2022-08-04 23:02:00', 'Yuen Long', '0 to 4']
['2022-08-04 23:02:00', 'Southern District', '0 to 3']
['2022-08-04 23:02:00', 'North District', '0 to 2']
['2022-08-04 23:02:00', 'Sham Shui Po', '0 to 2']
['2022-08-04 23:02:00', 'Central &amp; Western District', '0 to 1']
['2022-08-04 23:02:00', 'Kwai Tsing', '0 to 1']
['2022-08-04 23:02:00', 'Kwun Tong', '0 to 1']
['2022-08-04 23:02:00', 'Tuen Mun', '0 to 1']
['2022-08-05 00:02:00', 'Sha Tin', '0 to 7']
['2022-08-05 00:02:00', 'Wong Tai Sin', '0 to 5']
['2022-08-05 00:02:00', 'Tsuen Wan', '0 to 3']
['2022-08-05 00:02:00', 'Yuen Long', '0 to 3']
['2022-08-05 00:02:00', 'Islands District', '0 to 2']
['2022-08-05 00:02:00', 'North District', '0 to 2']
['2022-08-05 00:02:00', 'Tai Po', '0 to 2']
['20

['2023-03-28 04:02:00', 'Sai Kung', '0 to 1']
['2023-03-29 01:02:00', 'Central & Western District', '0 to 1']
['2023-03-29 01:02:00', 'Sha Tin', '0 to 1']
['2023-03-29 01:02:00', 'Tsuen Wan', '0 to 1']
['2023-03-29 02:02:00', 'Eastern District', '0 to 1']
['2023-03-29 02:02:00', 'Kwun Tong', '0 to 1']
['2023-03-29 02:02:00', 'Sai Kung', '0 to 1']
['2023-03-29 02:02:00', 'Southern District', '0 to 1']
['2023-03-29 02:02:00', 'Wan Chai', '0 to 1']
['2023-03-29 02:02:00', 'Wong Tai Sin', '0 to 1']
['2023-03-29 03:02:00', 'Sai Kung', '0 to 1']
['2023-03-29 03:02:00', 'Tsuen Wan', '0 to 1']
['2023-03-29 04:02:00', 'Sai Kung', '0 to 1']
['2023-03-29 04:02:00', 'Sha Tin', '0 to 1']
['2023-03-29 04:02:00', 'Tsuen Wan', '0 to 1']
['2023-03-29 04:02:00', 'Wong Tai Sin', '0 to 1']
['2023-03-29 05:02:00', 'Kwun Tong', '0 to 1']
['2023-03-29 05:02:00', 'Wan Chai', '0 to 1']
['2023-03-29 07:02:00', 'Eastern District', '0 to 1']
['2023-03-29 07:02:00', 'Sai Kung', '0 to 1']
['2023-03-29 07:02:00', 'T

['2023-09-01 10:02:00', 'Islands District', '0 to 1']
['2023-09-01 10:02:00', 'Kowloon City', '0 to 1']
['2023-09-01 10:02:00', 'Sai Kung', '0 to 1']
['2023-09-01 10:02:00', 'Sha Tin', '0 to 1']
['2023-09-01 10:02:00', 'Southern District', '0 to 1']
['2023-09-01 10:02:00', 'Tai Po', '0 to 1']
['2023-09-01 10:02:00', 'Tsuen Wan', '0 to 1']
['2023-09-01 10:02:00', 'Wan Chai', '0 to 1']
['2023-09-01 10:02:00', 'Yau Tsim Mong', '0 to 1']
['2023-09-01 12:02:00', 'Sai Kung', '0 to 1']
['2023-09-01 12:02:00', 'Tai Po', '0 to 1']
['2023-09-01 13:02:00', 'Sai Kung', '1 to 2']
['2023-09-01 13:02:00', 'Kwun Tong', '1']
['2023-09-01 13:02:00', 'Sha Tin', '1']
['2023-09-01 13:02:00', 'Wong Tai Sin', '1']
['2023-09-01 13:02:00', 'Tai Po', '0 to 2']
['2023-09-01 13:02:00', 'Eastern District', '0 to 1']
['2023-09-01 13:02:00', 'Islands District', '0 to 1']
['2023-09-01 13:02:00', 'North District', '0 to 1']
['2023-09-01 13:02:00', 'Southern District', '0 to 1']
['2023-09-01 13:02:00', 'Tsuen Wan', '0 

['2023-07-17 04:02:00', 'Wong Tai Sin', '1 to 3']
['2023-07-17 04:02:00', 'Sai Kung', '0 to 8']
['2023-07-17 04:02:00', 'Tsuen Wan', '0 to 2']
['2023-07-17 04:02:00', 'Islands District', '0 to 12']
['2023-07-17 04:02:00', 'North District', '0 to 1']
['2023-07-17 04:02:00', 'Sha Tin', '0 to 1']
['2023-07-17 04:02:00', 'Tai Po', '0 to 1']
['2023-07-17 05:02:00', 'North District', '5 to 15']
['2023-07-17 05:02:00', 'Sha Tin', '5 to 15']
['2023-07-17 05:02:00', 'Yuen Long', '4 to 17']
['2023-07-17 05:02:00', 'Tai Po', '4 to 15']
['2023-07-17 05:02:00', 'Central & Western District', '2 to 4']
['2023-07-17 05:02:00', 'Wong Tai Sin', '2 to 4']
['2023-07-17 05:02:00', 'Wan Chai', '2 to 3']
['2023-07-17 05:02:00', 'Tsuen Wan', '2 to 12']
['2023-07-17 05:02:00', 'Tuen Mun', '1 to 5']
['2023-07-17 05:02:00', 'Kwai Tsing', '1 to 4']
['2023-07-17 05:02:00', 'Kowloon City', '1 to 3']
['2023-07-17 05:02:00', 'Kwun Tong', '1 to 2']
['2023-07-17 05:02:00', 'Sham Shui Po', '1 to 2']
['2023-07-17 05:02:0

[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,

In [76]:
def xml_uv_parr(f):
    xml_path = '/Users/lpl/Desktop/bigdata /bigdata1/CS5488-Group8-main/data_info_uv_par.csv'
    file_path = '/Users/lpl/Desktop/bigdata /bigdata1/CS5488-Group8-main/data_2022/' + f
    file_name = f[0:13]
    write_xml_uv(file_path, xml_path)

In [77]:
Parallel(n_jobs=-1)(delayed(xml_uv_parr)(f) for f in path_list5)

['2019-08-27 14:02:00', '6', 'high']
['2019-08-27 14:02:00', '6', 'high']
['2019-08-27 14:02:00', '6', 'high']
['2019-08-27 15:02:00', '6', 'high']
['2019-08-27 16:02:00', '5', 'moderate']
['2019-08-27 16:02:00', '5', 'moderate']
['2019-08-27 17:02:00', '2', 'low']
['2019-08-27 17:16:00', '2', 'low']
['2019-08-27 17:16:00', '2', 'low']
['2019-08-27 17:16:00', '2', 'low']
['2019-08-27 18:02:00', '0.5', 'low']
['2019-08-27 18:02:00', '0.5', 'low']
['2019-08-28 08:02:00', '0.6', 'low']
['2019-08-28 08:02:00', '0.6', 'low']
['2019-08-28 08:02:00', '0.6', 'low']
['2019-08-28 09:02:00', '2', 'low']
['2019-08-28 09:02:00', '2', 'low']
['2019-08-28 10:02:00', '5', 'moderate']
['2019-08-28 10:02:00', '5', 'moderate']
['2019-08-28 11:02:00', '8', 'very high']
['2019-08-28 11:02:00', '8', 'very high']
['2019-08-28 11:02:00', '8', 'very high']
['2019-08-28 11:56:00', '8', 'very high']
['2019-08-28 12:06:00', '9', 'very high']
['2019-08-28 12:06:00', '9', 'very high']
['2019-08-28 12:06:00', '9', '

['2019-10-23 10:02:00', '3', 'moderate']
['2019-10-23 10:02:00', '3', 'moderate']
['2019-10-23 11:02:00', '5', 'moderate']
['2019-10-23 11:02:00', '5', 'moderate']
['2019-10-23 11:02:00', '5', 'moderate']
['2019-10-23 11:02:00', '5', 'moderate']
['2019-10-23 12:02:00', '7', 'high']
['2019-10-23 13:02:00', '7', 'high']
['2019-10-23 13:02:00', '7', 'high']
['2019-10-23 14:02:00', '6', 'high']
['2019-10-23 14:02:00', '6', 'high']
['2019-10-23 15:02:00', '4', 'moderate']
['2019-10-23 16:02:00', '2', 'low']
['2019-10-23 16:02:00', '2', 'low']
['2019-10-23 17:02:00', '0.6', 'low']
['2019-10-23 17:02:00', '0.6', 'low']
['2019-10-23 18:02:00', '0.1', 'low']
['2019-10-23 18:02:00', '0.1', 'low']
['2019-10-23 18:02:00', '0.1', 'low']
['2019-10-24 08:02:00', '0.3', 'low']
['2019-10-24 08:02:00', '0.3', 'low']
['2019-10-24 09:02:00', '1', 'low']
['2019-10-24 10:02:00', '3', 'moderate']
['2019-10-24 11:02:00', '5', 'moderate']
['2019-10-24 11:02:00', '5', 'moderate']
['2019-10-24 11:02:00', '5', 'm

['2019-09-26 10:02:00', '2', 'low']
['2019-09-26 11:02:00', '4', 'moderate']
['2019-09-26 11:02:00', '4', 'moderate']
['2019-09-26 12:02:00', '6', 'high']
['2019-09-26 12:02:00', '6', 'high']
['2019-09-26 13:02:00', '8', 'very high']
['2019-09-26 13:02:00', '8', 'very high']
['2019-09-26 13:02:00', '8', 'very high']
['2019-09-26 14:02:00', '7', 'high']
['2019-09-26 14:02:00', '7', 'high']
['2019-09-26 15:02:00', '4', 'moderate']
['2019-09-26 15:02:00', '4', 'moderate']
['2019-09-26 15:02:00', '4', 'moderate']
['2019-09-26 15:02:00', '4', 'moderate']
['2019-09-26 16:02:00', '1', 'low']
['2019-09-26 16:02:00', '1', 'low']
['2019-09-26 16:02:00', '1', 'low']
['2019-09-26 17:02:00', '0.9', 'low']
['2019-09-26 17:02:00', '0.9', 'low']
['2019-09-26 17:02:00', '0.9', 'low']
['2019-09-26 17:02:00', '0.9', 'low']
['2019-09-26 18:02:00', '0.2', 'low']
['2019-09-26 18:02:00', '0.2', 'low']
['2019-09-26 18:02:00', '0.2', 'low']
['2019-09-26 18:02:00', '0.2', 'low']
['2019-10-27 08:02:00', '0.2', '

[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,