In [1]:
pip install pvlib -q

Note: you may need to restart the kernel to use updated packages.


In [2]:
import math
import pytz
import pvlib

import numpy as np
import pandas as pd

from tqdm import tqdm
from datetime import datetime

In [3]:
tqdm.pandas()

In [4]:
additional = [2, 4, 7, 8, 9, 10, 12] # training set 中有額外資料的 Location code

In [5]:
def dms_to_decimal(degrees, minutes, seconds): # 把經緯度(度:角分:角秒)轉成十進位度數
    return round((degrees + (minutes/60) + (seconds/3600)), 4)

def get_distance_from_latlon_in_m(lat1, lon1, lat2, lon2): # Haversine function 計算兩個經緯度間距離(公尺)
    radius = 6371e3  # 地球半徑(m)
    
    # 轉換為弧度
    lat1_rad = math.radians(lat1)
    lat2_rad = math.radians(lat2)
        
    # Haversine 公式
    delta_lat = math.radians(lat2 - lat1)
    delta_lon = math.radians(lon2 - lon1)
    a = math.sin(delta_lat/2) * math.sin(delta_lat/2) + \
        math.cos(lat1_rad) * math.cos(lat2_rad) * \
        math.sin(delta_lon/2) * math.sin(delta_lon/2)
    
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))
    
    distance = radius * c
    return distance


station_exp_info = { # Lat, Lon, 面板朝向, 第一個測站, 第二個測站
     1: ['23/53/58', '121/32/40',  181, 'C0T9E0', 'C0Z100'],
     2: ['23/53/59', '121/32/41',  175, 'C0T9E0', 'C0Z100'],
     3: ['23/53/59', '121/32/42',  180, 'C0T9E0', 'C0Z100'],
     4: ['23/53/58', '121/32/40',  161, 'C0T9E0', 'C0Z100'],
     5: ['23/53/58', '121/32/41',  208, 'C0T9E0', 'C0Z100'],
     6: ['23/53/58', '121/32/40',  208, 'C0T9E0', 'C0Z100'],
     7: ['23/53/58', '121/32/40',  172, 'C0T9E0', 'C0Z100'],
     8: ['23/53/59', '121/32/42',  219, 'C0T9E0', 'C0Z100'],
     9: ['23/53/58', '121/32/40',  151, 'C0T9E0', 'C0Z100'],
    10: ['23/53/58', '121/32/40',  223, 'C0T9E0', 'C0Z100'],
    11: ['23/53/59', '121/32/41',  131, 'C0T9E0', 'C0Z100'],
    12: ['23/53/59', '121/32/41',  298, 'C0T9E0', 'C0Z100'],
    13: ['23/53/52', '121/32/22',  249, 'C0T9E0', 'C0Z100'],
    14: ['23/53/52', '121/32/22',  197, 'C0T9E0', 'C0Z100'],
    15: ['24/00/33', '121/37/02',  127, '466990', '466990'],
    16: ['24/00/32', '121/37/02',   82, '466990', '466990'],
    17: [ 23.9751  ,  121.6133  , None, '466990', '466990'] # LC 17 的經緯度為觀測站數據
}

for id in station_exp_info.keys():
    if (id == 17):
        continue
    
    else:
        lat_d, lat_m, lat_s = map(int, station_exp_info[id][0].split('/'))
        lon_d, lon_m, lon_s = map(int, station_exp_info[id][1].split('/'))

        lat = dms_to_decimal(lat_d, lat_m, lat_s)
        lon = dms_to_decimal(lon_d, lon_m, lon_s)

        station_exp_info[id][0] = lat
        station_exp_info[id][1] = lon

station_exp_info[1]

[23.8994, 121.5444, 181, 'C0T9E0', 'C0Z100']

In [6]:
cwb = pd.read_csv('/kaggle/input/ai-cup-cwb-data/cwb.csv')

temp = {}

for station_id in list(cwb['Station_ID'].unique()):
    temp[station_id] = cwb.loc[cwb['Station_ID']==station_id].reset_index(drop=True, inplace=False)
    
cwb = temp

print('cwb station id:', cwb.keys())
cwb['466990'].head()

cwb station id: dict_keys(['466990', 'C0T9E0', 'C0Z100'])


Unnamed: 0,Station_ID,lng.,lat.,altitude,Year,Month,Day,Hour,StnPres,SeaPres,...,Visb,UVI,Cloud Amount,TxSoil0cm,TxSoil5cm,TxSoil10cm,TxSoil20cm,TxSoil30cm,TxSoil50cm,TxSoil100cm
0,466990,121.613275,23.975128,16.1,2024,1,1,1,1020.2,1022.5,...,,0.0,,,,,,,,
1,466990,121.613275,23.975128,16.1,2024,1,1,2,1019.9,1022.2,...,,0.0,,,,,,,,
2,466990,121.613275,23.975128,16.1,2024,1,1,3,1019.5,1021.8,...,,0.0,,,,,,,,
3,466990,121.613275,23.975128,16.1,2024,1,1,4,1019.3,1021.6,...,,0.0,,,,,,,,
4,466990,121.613275,23.975128,16.1,2024,1,1,5,1019.4,1021.7,...,,0.0,,17.8,18.5,19.2,19.7,20.8,21.6,22.5


In [7]:
dataset = []
for station_id in range(1, 18):
    
    if station_id in additional:
        d1 = pd.read_csv(f'/kaggle/input/ai-cup-original-training-set/L{station_id}_Train.csv')
        d2 = pd.read_csv(f'/kaggle/input/ai-cup-original-training-set/L{station_id}_Train_2.csv')
        
        data = pd.concat([d1, d2], ignore_index=True)
    else:
        data = pd.read_csv(f'/kaggle/input/ai-cup-original-training-set/L{station_id}_Train.csv')
    
    dataset.append(data)

dataset = pd.concat(dataset, ignore_index=True)

dataset.head()

Unnamed: 0,LocationCode,DateTime,WindSpeed(m/s),Pressure(hpa),Temperature(°C),Humidity(%),Sunlight(Lux),Power(mW)
0,1,2024-01-01 06:31:08.000,0.0,1016.5,17.5,86.8,25.0,0.0
1,1,2024-01-01 06:32:08.000,0.0,1016.53,17.5,86.7,28.33,0.0
2,1,2024-01-01 06:33:08.000,0.0,1016.57,17.5,86.7,32.5,0.0
3,1,2024-01-01 06:34:08.000,0.0,1016.58,17.5,86.7,39.17,0.0
4,1,2024-01-01 06:35:08.000,0.0,1016.59,17.5,86.7,45.83,0.0


In [8]:
def get_Location_info(id):
    lat = station_exp_info[id][0]
    lon = station_exp_info[id][1]
    direction = station_exp_info[id][2]

    return lat, lon, direction

dataset[['lat', 'lon', 'direction']] = dataset.progress_apply(lambda x: get_Location_info(x['LocationCode']), axis=1, result_type='expand')

print('新增 lat, lon, direction')

dataset.head()

100%|██████████| 1375028/1375028 [00:35<00:00, 38402.28it/s] 

新增 lat, lon, direction





Unnamed: 0,LocationCode,DateTime,WindSpeed(m/s),Pressure(hpa),Temperature(°C),Humidity(%),Sunlight(Lux),Power(mW),lat,lon,direction
0,1,2024-01-01 06:31:08.000,0.0,1016.5,17.5,86.8,25.0,0.0,23.8994,121.5444,181.0
1,1,2024-01-01 06:32:08.000,0.0,1016.53,17.5,86.7,28.33,0.0,23.8994,121.5444,181.0
2,1,2024-01-01 06:33:08.000,0.0,1016.57,17.5,86.7,32.5,0.0,23.8994,121.5444,181.0
3,1,2024-01-01 06:34:08.000,0.0,1016.58,17.5,86.7,39.17,0.0,23.8994,121.5444,181.0
4,1,2024-01-01 06:35:08.000,0.0,1016.59,17.5,86.7,45.83,0.0,23.8994,121.5444,181.0


In [9]:
def get_cwb_info(id, dt):
    station1  = station_exp_info[id][3]
    station2  = station_exp_info[id][4]
    
    date, time = dt.split(' ')
    m, d = map(int, date.split('-')[1:])
    h = int(time.split(':')[0])+1

    
    cwb3 = cwb['466990'][(cwb['466990']["Month"]==m) & (cwb['466990']["Day"]==d) & (cwb['466990']["Hour"]==h)]

    if station1 == '466990':
        cwb1 = cwb3
    else:
        cwb1 = cwb[station1][(cwb[station1]["Month"]==m) & (cwb[station1]["Day"]==d) & (cwb[station1]["Hour"]==h)]

    if station2 == '466990':
        cwb2 = cwb3
    else:
        cwb2 = cwb[station2][(cwb[station2]["Month"]==m) & (cwb[station2]["Day"]==d) & (cwb[station2]["Hour"]==h)]
    
    try:
        pres = float(cwb1['StnPres'].iloc[0])
        temp = float(cwb1['Temperature'].iloc[0])
        rh = float(cwb1['RH'].iloc[0])
        precp = float(cwb1['Precp'].iloc[0])
        rad = float(cwb2['GloblRad'].iloc[0])
        sun = float(cwb3['SunShine'].iloc[0])
        visb = float(cwb3['Visb'].iloc[0])
        uvi = float(cwb3['UVI'].iloc[0])
        cloud = float(cwb3['Cloud Amount'].iloc[0])
    except:
        print('sp:',cwb1['StnPres'])
        print('te:',cwb1['Temperature'])
        print('rh:',cwb1['RH'])
        print('pr:',cwb1['Precp'])
        print('gl:',cwb2['GloblRad'])
        print('ss:',cwb3['SunShine'])
        print('vi:',cwb3['Visb'])
        print('uv:',cwb3['UVI'])
        print('cl:',cwb3['Cloud'])
        print(id, dt, station1, station2, m, d, h)
    
    return (pres, temp, rh, precp, rad, sun, visb, uvi, cloud)
    

dataset[['pres_cwb', 'temp_cwb', 'rh_cwb', 'precp_cwb', 'rad_cwb', 'sun_cwb', 'visb_cwb', 'uvi_cwb', 'cloud_cwb']] = dataset.progress_apply(lambda x: get_cwb_info(x['LocationCode'], x['DateTime']), axis=1, result_type='expand')


print('新增 pres_cwb, temp_cwb, rh_cwb, precp_cwb, rad_cwb, sun_cwb, visb_cwb, uvi_cwb, cloud_cwb')

dataset

100%|██████████| 1375028/1375028 [44:01<00:00, 520.60it/s] 

新增 pres_cwb, temp_cwb, rh_cwb, precp_cwb, rad_cwb, sun_cwb, visb_cwb, uvi_cwb, cloud_cwb





Unnamed: 0,LocationCode,DateTime,WindSpeed(m/s),Pressure(hpa),Temperature(°C),Humidity(%),Sunlight(Lux),Power(mW),lat,lon,direction,pres_cwb,temp_cwb,rh_cwb,precp_cwb,rad_cwb,sun_cwb,visb_cwb,uvi_cwb,cloud_cwb
0,1,2024-01-01 06:31:08.000,0.0,1016.50,17.5,86.8,25.00,0.00,23.8994,121.5444,181.0,975.6,14.5,88.0,0.0,0.0,0.0,,0.00,
1,1,2024-01-01 06:32:08.000,0.0,1016.53,17.5,86.7,28.33,0.00,23.8994,121.5444,181.0,975.6,14.5,88.0,0.0,0.0,0.0,,0.00,
2,1,2024-01-01 06:33:08.000,0.0,1016.57,17.5,86.7,32.50,0.00,23.8994,121.5444,181.0,975.6,14.5,88.0,0.0,0.0,0.0,,0.00,
3,1,2024-01-01 06:34:08.000,0.0,1016.58,17.5,86.7,39.17,0.00,23.8994,121.5444,181.0,975.6,14.5,88.0,0.0,0.0,0.0,,0.00,
4,1,2024-01-01 06:35:08.000,0.0,1016.59,17.5,86.7,45.83,0.00,23.8994,121.5444,181.0,975.6,14.5,88.0,0.0,0.0,0.0,,0.00,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1375023,17,2024-07-12 15:26:46.000,0.0,1004.05,36.5,65.4,12315.00,35.91,23.9751,121.6133,,1002.4,31.6,75.0,0.0,1.8,0.9,,4.14,
1375024,17,2024-07-12 15:27:47.000,0.0,1004.08,36.5,64.7,12692.50,38.23,23.9751,121.6133,,1002.4,31.6,75.0,0.0,1.8,0.9,,4.14,
1375025,17,2024-07-12 15:28:47.000,0.0,1004.10,36.5,64.5,12960.00,39.95,23.9751,121.6133,,1002.4,31.6,75.0,0.0,1.8,0.9,,4.14,
1375026,17,2024-07-12 15:29:47.000,0.0,1004.11,36.4,64.9,12751.67,38.51,23.9751,121.6133,,1002.4,31.6,75.0,0.0,1.8,0.9,,4.14,


In [10]:
def get_solar_radiation(lat, lon, datetime, temperature=12, pres=1013.25, tz='Asia/Taipei'):
    # 建立位置物件
    location = pvlib.location.Location(
       latitude=lat,
       longitude=lon,
       tz=tz
    )
    
    # 處理時間
    time = pd.Timestamp(datetime, tz=tz)
    times = pd.DatetimeIndex([time])
    
    # 計算太陽位置
    solar_position = location.get_solarposition(
       times,
       temperature=temperature,
       pressure=pres
    )
    
    # 計算晴空輻射
    clearsky = location.get_clearsky(
       times,
       pressure=pres
    )
    
    # 整理結果
    return (
        solar_position['apparent_zenith'].iloc[0],
        solar_position['zenith'].iloc[0],
        solar_position['apparent_elevation'].iloc[0],
        solar_position['elevation'].iloc[0],
        solar_position['azimuth'].iloc[0],
        clearsky['ghi'].iloc[0],
        clearsky['dni'].iloc[0],
        clearsky['dhi'].iloc[0]
    )

dataset[['apparent_zenith', 'zenith', 'apparent_elevation', 'elevation', 'azimuth', 'ghi', 'dni', 'dhi']] = dataset.progress_apply(
    lambda x: get_solar_radiation(x['lat'], x['lon'], x['DateTime'], x['temp_cwb'], x['pres_cwb']), axis=1, result_type='expand')


print('新增 apparent_zenith, zenith, apparent_elevation, elevation, azimuth, ghi, dni, dhi')

dataset

100%|██████████| 1375028/1375028 [6:51:42<00:00, 55.66it/s]


新增 apparent_zenith, zenith, apparent_elevation, elevation, azimuth, ghi, dni, dhi


Unnamed: 0,LocationCode,DateTime,WindSpeed(m/s),Pressure(hpa),Temperature(°C),Humidity(%),Sunlight(Lux),Power(mW),lat,lon,...,uvi_cwb,cloud_cwb,apparent_zenith,zenith,apparent_elevation,elevation,azimuth,ghi,dni,dhi
0,1,2024-01-01 06:31:08.000,0.0,1016.50,17.5,86.8,25.00,0.00,23.8994,121.5444,...,0.00,,91.917932,91.917932,-1.917932,-1.917932,114.450417,0.000000,0.000000,0.000000
1,1,2024-01-01 06:32:08.000,0.0,1016.53,17.5,86.7,28.33,0.00,23.8994,121.5444,...,0.00,,91.709992,91.709992,-1.709992,-1.709992,114.548622,0.000000,0.000000,0.000000
2,1,2024-01-01 06:33:08.000,0.0,1016.57,17.5,86.7,32.50,0.00,23.8994,121.5444,...,0.00,,91.502216,91.502216,-1.502216,-1.502216,114.647162,0.000000,0.000000,0.000000
3,1,2024-01-01 06:34:08.000,0.0,1016.58,17.5,86.7,39.17,0.00,23.8994,121.5444,...,0.00,,91.294603,91.294603,-1.294603,-1.294603,114.746037,0.000000,0.000000,0.000000
4,1,2024-01-01 06:35:08.000,0.0,1016.59,17.5,86.7,45.83,0.00,23.8994,121.5444,...,0.00,,91.087156,91.087156,-1.087156,-1.087156,114.845251,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1375023,17,2024-07-12 15:26:46.000,0.0,1004.05,36.5,65.4,12315.00,35.91,23.9751,121.6133,...,4.14,,47.554966,47.555136,42.445034,42.444864,278.396905,594.551396,677.751829,137.148378
1375024,17,2024-07-12 15:27:47.000,0.0,1004.08,36.5,64.7,12692.50,38.23,23.9751,121.6133,...,4.14,,47.784701,47.784873,42.215299,42.215127,278.469019,591.250877,676.329372,136.812645
1375025,17,2024-07-12 15:28:47.000,0.0,1004.10,36.5,64.5,12960.00,39.95,23.9751,121.6133,...,4.14,,48.010628,48.010801,41.989372,41.989199,278.539940,587.994166,674.916117,136.480078
1375026,17,2024-07-12 15:29:47.000,0.0,1004.11,36.4,64.9,12751.67,38.51,23.9751,121.6133,...,4.14,,48.236514,48.236688,41.763486,41.763312,278.610851,584.727322,673.488707,136.145177


In [11]:
dataset

Unnamed: 0,LocationCode,DateTime,WindSpeed(m/s),Pressure(hpa),Temperature(°C),Humidity(%),Sunlight(Lux),Power(mW),lat,lon,...,uvi_cwb,cloud_cwb,apparent_zenith,zenith,apparent_elevation,elevation,azimuth,ghi,dni,dhi
0,1,2024-01-01 06:31:08.000,0.0,1016.50,17.5,86.8,25.00,0.00,23.8994,121.5444,...,0.00,,91.917932,91.917932,-1.917932,-1.917932,114.450417,0.000000,0.000000,0.000000
1,1,2024-01-01 06:32:08.000,0.0,1016.53,17.5,86.7,28.33,0.00,23.8994,121.5444,...,0.00,,91.709992,91.709992,-1.709992,-1.709992,114.548622,0.000000,0.000000,0.000000
2,1,2024-01-01 06:33:08.000,0.0,1016.57,17.5,86.7,32.50,0.00,23.8994,121.5444,...,0.00,,91.502216,91.502216,-1.502216,-1.502216,114.647162,0.000000,0.000000,0.000000
3,1,2024-01-01 06:34:08.000,0.0,1016.58,17.5,86.7,39.17,0.00,23.8994,121.5444,...,0.00,,91.294603,91.294603,-1.294603,-1.294603,114.746037,0.000000,0.000000,0.000000
4,1,2024-01-01 06:35:08.000,0.0,1016.59,17.5,86.7,45.83,0.00,23.8994,121.5444,...,0.00,,91.087156,91.087156,-1.087156,-1.087156,114.845251,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1375023,17,2024-07-12 15:26:46.000,0.0,1004.05,36.5,65.4,12315.00,35.91,23.9751,121.6133,...,4.14,,47.554966,47.555136,42.445034,42.444864,278.396905,594.551396,677.751829,137.148378
1375024,17,2024-07-12 15:27:47.000,0.0,1004.08,36.5,64.7,12692.50,38.23,23.9751,121.6133,...,4.14,,47.784701,47.784873,42.215299,42.215127,278.469019,591.250877,676.329372,136.812645
1375025,17,2024-07-12 15:28:47.000,0.0,1004.10,36.5,64.5,12960.00,39.95,23.9751,121.6133,...,4.14,,48.010628,48.010801,41.989372,41.989199,278.539940,587.994166,674.916117,136.480078
1375026,17,2024-07-12 15:29:47.000,0.0,1004.11,36.4,64.9,12751.67,38.51,23.9751,121.6133,...,4.14,,48.236514,48.236688,41.763486,41.763312,278.610851,584.727322,673.488707,136.145177


In [12]:
def get_minutes_of_day(datetime_str):
    datetime_str = str(datetime_str)
    dt = datetime.strptime(datetime_str, '%Y-%m-%d %H:%M:%S')
    num_of_min = dt.hour * 60 + dt.minute
    
    return num_of_min # 直接回傳是第幾分鐘

dataset['DateTime'] = pd.to_datetime(dataset['DateTime'])
dataset['num_of_min'] = dataset['DateTime'].progress_apply(get_minutes_of_day)

print('計算資料為當天的第幾分鐘 num_of_min')

100%|██████████| 1375028/1375028 [00:20<00:00, 66399.26it/s]

計算資料為當天的第幾分鐘 num_of_min





In [13]:
dataset['day_of_year'] = dataset['DateTime'].dt.dayofyear

print('計算資料為當年的第幾天 day_of_year')

計算資料為當年的第幾天 day_of_year


In [14]:
def split_time_feature(datetime_str):
    datetime_str = str(datetime_str)
    dt = datetime.strptime(datetime_str, '%Y-%m-%d %H:%M:%S')
    
    return (dt.month, dt.day, dt.hour, dt.minute)

dataset['DateTime'] = pd.to_datetime(dataset['DateTime'])
dataset[['month', 'day', 'hour', 'min']] = dataset.progress_apply(lambda x: split_time_feature(x['DateTime']), axis=1, result_type='expand')

print('拆分每個時間特徵 month, day, hour, min')

100%|██████████| 1375028/1375028 [01:01<00:00, 22345.04it/s]

拆分每個時間特徵 month, day, hour, min





In [15]:
dataset['hour_sin'] = np.sin(dataset['hour'] * (2 * np.pi / 24))
dataset['hour_cos'] = np.cos(dataset['hour'] * (2 * np.pi / 24))

print('新增循環時間特徵 hour_sin, hour_cos')

新增循環時間特徵 hour_sin, hour_cos


In [16]:
dataset

Unnamed: 0,LocationCode,DateTime,WindSpeed(m/s),Pressure(hpa),Temperature(°C),Humidity(%),Sunlight(Lux),Power(mW),lat,lon,...,dni,dhi,num_of_min,day_of_year,month,day,hour,min,hour_sin,hour_cos
0,1,2024-01-01 06:31:08,0.0,1016.50,17.5,86.8,25.00,0.00,23.8994,121.5444,...,0.000000,0.000000,391,1,1,1,6,31,1.000000,6.123234e-17
1,1,2024-01-01 06:32:08,0.0,1016.53,17.5,86.7,28.33,0.00,23.8994,121.5444,...,0.000000,0.000000,392,1,1,1,6,32,1.000000,6.123234e-17
2,1,2024-01-01 06:33:08,0.0,1016.57,17.5,86.7,32.50,0.00,23.8994,121.5444,...,0.000000,0.000000,393,1,1,1,6,33,1.000000,6.123234e-17
3,1,2024-01-01 06:34:08,0.0,1016.58,17.5,86.7,39.17,0.00,23.8994,121.5444,...,0.000000,0.000000,394,1,1,1,6,34,1.000000,6.123234e-17
4,1,2024-01-01 06:35:08,0.0,1016.59,17.5,86.7,45.83,0.00,23.8994,121.5444,...,0.000000,0.000000,395,1,1,1,6,35,1.000000,6.123234e-17
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1375023,17,2024-07-12 15:26:46,0.0,1004.05,36.5,65.4,12315.00,35.91,23.9751,121.6133,...,677.751829,137.148378,926,194,7,12,15,26,-0.707107,-7.071068e-01
1375024,17,2024-07-12 15:27:47,0.0,1004.08,36.5,64.7,12692.50,38.23,23.9751,121.6133,...,676.329372,136.812645,927,194,7,12,15,27,-0.707107,-7.071068e-01
1375025,17,2024-07-12 15:28:47,0.0,1004.10,36.5,64.5,12960.00,39.95,23.9751,121.6133,...,674.916117,136.480078,928,194,7,12,15,28,-0.707107,-7.071068e-01
1375026,17,2024-07-12 15:29:47,0.0,1004.11,36.4,64.9,12751.67,38.51,23.9751,121.6133,...,673.488707,136.145177,929,194,7,12,15,29,-0.707107,-7.071068e-01


In [17]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1375028 entries, 0 to 1375027
Data columns (total 36 columns):
 #   Column              Non-Null Count    Dtype         
---  ------              --------------    -----         
 0   LocationCode        1375028 non-null  int64         
 1   DateTime            1375028 non-null  datetime64[ns]
 2   WindSpeed(m/s)      1375028 non-null  float64       
 3   Pressure(hpa)       1375028 non-null  float64       
 4   Temperature(°C)     1375028 non-null  float64       
 5   Humidity(%)         1375028 non-null  float64       
 6   Sunlight(Lux)       1375028 non-null  float64       
 7   Power(mW)           1375028 non-null  float64       
 8   lat                 1375028 non-null  float64       
 9   lon                 1375028 non-null  float64       
 10  direction           1278386 non-null  float64       
 11  pres_cwb            1374548 non-null  float64       
 12  temp_cwb            1374548 non-null  float64       
 13  rh_cwb      

In [18]:
for i in range(1, 18):
    temp = dataset.query(f'LocationCode=={i}')

    print(temp.info())
    
    temp.to_csv(f'{i}.csv', index=False)

<class 'pandas.core.frame.DataFrame'>
Index: 101673 entries, 0 to 101672
Data columns (total 36 columns):
 #   Column              Non-Null Count   Dtype         
---  ------              --------------   -----         
 0   LocationCode        101673 non-null  int64         
 1   DateTime            101673 non-null  datetime64[ns]
 2   WindSpeed(m/s)      101673 non-null  float64       
 3   Pressure(hpa)       101673 non-null  float64       
 4   Temperature(°C)     101673 non-null  float64       
 5   Humidity(%)         101673 non-null  float64       
 6   Sunlight(Lux)       101673 non-null  float64       
 7   Power(mW)           101673 non-null  float64       
 8   lat                 101673 non-null  float64       
 9   lon                 101673 non-null  float64       
 10  direction           101673 non-null  float64       
 11  pres_cwb            101613 non-null  float64       
 12  temp_cwb            101613 non-null  float64       
 13  rh_cwb              101613 non-nul