In [1]:
pip install pvlib -q

Note: you may need to restart the kernel to use updated packages.


In [2]:
import math
import pytz
import pvlib

import numpy as np
import pandas as pd

from tqdm import tqdm
from datetime import datetime, timedelta

In [3]:
tqdm.pandas()

In [4]:
def serial2info(serial):
    id = int(serial[-2:])
    
    temp = [serial[:4], serial[4:6], serial[6:8], serial[8:10], serial[10:12]]
    
    date = f'{temp[0]}-{temp[1]}-{temp[2]}'
    time = f'{temp[3]}:{temp[4]}:00'
        
    return id, date, time

In [5]:
def dms_to_decimal(degrees, minutes, seconds): # 把經緯度(度:角分:角秒)轉成十進位度數
    return round((degrees + (minutes/60) + (seconds/3600)), 4)

def get_distance_from_latlon_in_m(lat1, lon1, lat2, lon2): # Haversine function 計算兩個經緯度間距離(公尺)
    radius = 6371e3  # 地球半徑(m)
    
    # 轉換為弧度
    lat1_rad = math.radians(lat1)
    lat2_rad = math.radians(lat2)
        
    # Haversine 公式
    delta_lat = math.radians(lat2 - lat1)
    delta_lon = math.radians(lon2 - lon1)
    a = math.sin(delta_lat/2) * math.sin(delta_lat/2) + \
        math.cos(lat1_rad) * math.cos(lat2_rad) * \
        math.sin(delta_lon/2) * math.sin(delta_lon/2)
    
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))
    
    distance = radius * c
    return distance


station_exp_info = { # Lat, Lon, 面板朝向, 第一個測站, 第二個測站
     1: ['23/53/58', '121/32/40',  181, 'C0T9E0', 'C0Z100'],
     2: ['23/53/59', '121/32/41',  175, 'C0T9E0', 'C0Z100'],
     3: ['23/53/59', '121/32/42',  180, 'C0T9E0', 'C0Z100'],
     4: ['23/53/58', '121/32/40',  161, 'C0T9E0', 'C0Z100'],
     5: ['23/53/58', '121/32/41',  208, 'C0T9E0', 'C0Z100'],
     6: ['23/53/58', '121/32/40',  208, 'C0T9E0', 'C0Z100'],
     7: ['23/53/58', '121/32/40',  172, 'C0T9E0', 'C0Z100'],
     8: ['23/53/59', '121/32/42',  219, 'C0T9E0', 'C0Z100'],
     9: ['23/53/58', '121/32/40',  151, 'C0T9E0', 'C0Z100'],
    10: ['23/53/58', '121/32/40',  223, 'C0T9E0', 'C0Z100'],
    11: ['23/53/59', '121/32/41',  131, 'C0T9E0', 'C0Z100'],
    12: ['23/53/59', '121/32/41',  298, 'C0T9E0', 'C0Z100'],
    13: ['23/53/52', '121/32/22',  249, 'C0T9E0', 'C0Z100'],
    14: ['23/53/52', '121/32/22',  197, 'C0T9E0', 'C0Z100'],
    15: ['24/00/33', '121/37/02',  127, '466990', '466990'],
    16: ['24/00/32', '121/37/02',   82, '466990', '466990'],
    17: [ 23.9751  ,  121.6133  , None, '466990', '466990'] # LC 17 的經緯度為觀測站數據
}

for id in station_exp_info.keys():
    if (id == 17):
        continue
    
    else:
        lat_d, lat_m, lat_s = map(int, station_exp_info[id][0].split('/'))
        lon_d, lon_m, lon_s = map(int, station_exp_info[id][1].split('/'))

        lat = dms_to_decimal(lat_d, lat_m, lat_s)
        lon = dms_to_decimal(lon_d, lon_m, lon_s)

        station_exp_info[id][0] = lat
        station_exp_info[id][1] = lon

station_exp_info[1]

[23.8994, 121.5444, 181, 'C0T9E0', 'C0Z100']

In [6]:
testset = pd.read_csv('/kaggle/input/ai-cup-original-testset/upload(no answer).csv')

testset['序號'] = testset['序號'].astype('str')

testset

Unnamed: 0,序號,答案
0,20240117090001,
1,20240117091001,
2,20240117092001,
3,20240117093001,
4,20240117094001,
...,...,...
9595,20241002161012,
9596,20241002162012,
9597,20241002163012,
9598,20241002164012,


In [7]:
dataset = testset.drop(['答案'], axis=1)

temp = {'LocationCode': [], 'DateTime': [], 'WindSpeed(m/s)': [],
        'Pressure(hpa)': [], 'Temperature(°C)': [], 'Humidity(%)': [],
        'Sunlight(Lux)': [], 'Power(mW)': [],
       }

for q in range(0, len(dataset), 48):
    id, date, time = serial2info(dataset.iloc[q]['序號'])

    time = datetime.strptime(time, '%H:%M:%S')
    for i in range(48):
        for j in range(10):            
            temp['LocationCode'].append(id)
            temp['DateTime'].append(f'{date} {time.strftime("%H:%M:%S")}')
            
            temp['WindSpeed(m/s)'].append(None)
            temp['Pressure(hpa)'].append(None)
            temp['Temperature(°C)'].append(None)
            temp['Humidity(%)'].append(None)
            temp['Sunlight(Lux)'].append(None)
            temp['Power(mW)'].append(None)

            time = time + timedelta(minutes=1)
            
dataset = pd.DataFrame(temp)

dataset.head()

Unnamed: 0,LocationCode,DateTime,WindSpeed(m/s),Pressure(hpa),Temperature(°C),Humidity(%),Sunlight(Lux),Power(mW)
0,1,2024-01-17 09:00:00,,,,,,
1,1,2024-01-17 09:01:00,,,,,,
2,1,2024-01-17 09:02:00,,,,,,
3,1,2024-01-17 09:03:00,,,,,,
4,1,2024-01-17 09:04:00,,,,,,


In [8]:
def get_Location_info(id):
    lat = station_exp_info[id][0]
    lon = station_exp_info[id][1]
    direction = station_exp_info[id][2]

    return lat, lon, direction

dataset[['lat', 'lon', 'direction']] = dataset.progress_apply(lambda x: get_Location_info(x['LocationCode']), axis=1, result_type='expand')

print('新增 lat, lon, direction')

dataset.head()

100%|██████████| 96000/96000 [00:02<00:00, 38242.23it/s] 

新增 lat, lon, direction





Unnamed: 0,LocationCode,DateTime,WindSpeed(m/s),Pressure(hpa),Temperature(°C),Humidity(%),Sunlight(Lux),Power(mW),lat,lon,direction
0,1,2024-01-17 09:00:00,,,,,,,23.8994,121.5444,181.0
1,1,2024-01-17 09:01:00,,,,,,,23.8994,121.5444,181.0
2,1,2024-01-17 09:02:00,,,,,,,23.8994,121.5444,181.0
3,1,2024-01-17 09:03:00,,,,,,,23.8994,121.5444,181.0
4,1,2024-01-17 09:04:00,,,,,,,23.8994,121.5444,181.0


In [9]:
cwb = pd.read_csv('/kaggle/input/ai-cup-cwb-data/cwb.csv')

temp = {}

for station_id in list(cwb['Station_ID'].unique()):
    temp[station_id] = cwb.loc[cwb['Station_ID']==station_id].reset_index(drop=True, inplace=False)
    
cwb = temp

print('cwb station id:', cwb.keys())
cwb['466990'].head()

cwb station id: dict_keys(['466990', 'C0T9E0', 'C0Z100'])


Unnamed: 0,Station_ID,lng.,lat.,altitude,Year,Month,Day,Hour,StnPres,SeaPres,...,Visb,UVI,Cloud Amount,TxSoil0cm,TxSoil5cm,TxSoil10cm,TxSoil20cm,TxSoil30cm,TxSoil50cm,TxSoil100cm
0,466990,121.613275,23.975128,16.1,2024,1,1,1,1020.2,1022.5,...,,0.0,,,,,,,,
1,466990,121.613275,23.975128,16.1,2024,1,1,2,1019.9,1022.2,...,,0.0,,,,,,,,
2,466990,121.613275,23.975128,16.1,2024,1,1,3,1019.5,1021.8,...,,0.0,,,,,,,,
3,466990,121.613275,23.975128,16.1,2024,1,1,4,1019.3,1021.6,...,,0.0,,,,,,,,
4,466990,121.613275,23.975128,16.1,2024,1,1,5,1019.4,1021.7,...,,0.0,,17.8,18.5,19.2,19.7,20.8,21.6,22.5


In [10]:
def get_cwb_info(id, dt):
    station1  = station_exp_info[id][3]
    station2  = station_exp_info[id][4]
    
    date, time = dt.split(' ')
    m, d = map(int, date.split('-')[1:])
    h = int(time.split(':')[0])+1

    
    cwb3 = cwb['466990'][(cwb['466990']["Month"]==m) & (cwb['466990']["Day"]==d) & (cwb['466990']["Hour"]==h)]

    if station1 == '466990':
        cwb1 = cwb3
    else:
        cwb1 = cwb[station1][(cwb[station1]["Month"]==m) & (cwb[station1]["Day"]==d) & (cwb[station1]["Hour"]==h)]

    if station2 == '466990':
        cwb2 = cwb3
    else:
        cwb2 = cwb[station2][(cwb[station2]["Month"]==m) & (cwb[station2]["Day"]==d) & (cwb[station2]["Hour"]==h)]
    
    try:
        pres = float(cwb1['StnPres'].iloc[0])
        temp = float(cwb1['Temperature'].iloc[0])
        rh = float(cwb1['RH'].iloc[0])
        precp = float(cwb1['Precp'].iloc[0])
        rad = float(cwb2['GloblRad'].iloc[0])
        sun = float(cwb3['SunShine'].iloc[0])
        visb = float(cwb3['Visb'].iloc[0])
        uvi = float(cwb3['UVI'].iloc[0])
        cloud = float(cwb3['Cloud Amount'].iloc[0])
    except:
        print('sp:',cwb1['StnPres'])
        print('te:',cwb1['Temperature'])
        print('rh:',cwb1['RH'])
        print('pr:',cwb1['Precp'])
        print('gl:',cwb2['GloblRad'])
        print('ss:',cwb3['SunShine'])
        print('vi:',cwb3['Visb'])
        print('uv:',cwb3['UVI'])
        print('cl:',cwb3['Cloud'])
        print(id, dt, station1, station2, m, d, h)
    
    return (pres, temp, rh, precp, rad, sun, visb, uvi, cloud)
    

dataset[['pres_cwb', 'temp_cwb', 'rh_cwb', 'precp_cwb', 'rad_cwb', 'sun_cwb', 'visb_cwb', 'uvi_cwb', 'cloud_cwb']] = dataset.progress_apply(lambda x: get_cwb_info(x['LocationCode'], x['DateTime']), axis=1, result_type='expand')


print('新增 pres_cwb, temp_cwb, rh_cwb, precp_cwb, rad_cwb, sun_cwb, visb_cwb, uvi_cwb, cloud_cwb')

dataset

100%|██████████| 96000/96000 [03:20<00:00, 479.11it/s]

新增 pres_cwb, temp_cwb, rh_cwb, precp_cwb, rad_cwb, sun_cwb, visb_cwb, uvi_cwb, cloud_cwb





Unnamed: 0,LocationCode,DateTime,WindSpeed(m/s),Pressure(hpa),Temperature(°C),Humidity(%),Sunlight(Lux),Power(mW),lat,lon,direction,pres_cwb,temp_cwb,rh_cwb,precp_cwb,rad_cwb,sun_cwb,visb_cwb,uvi_cwb,cloud_cwb
0,1,2024-01-17 09:00:00,,,,,,,23.8994,121.5444,181.0,977.2,17.3,92.0,0.0,0.8,0.0,,1.82,
1,1,2024-01-17 09:01:00,,,,,,,23.8994,121.5444,181.0,977.2,17.3,92.0,0.0,0.8,0.0,,1.82,
2,1,2024-01-17 09:02:00,,,,,,,23.8994,121.5444,181.0,977.2,17.3,92.0,0.0,0.8,0.0,,1.82,
3,1,2024-01-17 09:03:00,,,,,,,23.8994,121.5444,181.0,977.2,17.3,92.0,0.0,0.8,0.0,,1.82,
4,1,2024-01-17 09:04:00,,,,,,,23.8994,121.5444,181.0,977.2,17.3,92.0,0.0,0.8,0.0,,1.82,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95995,12,2024-10-02 16:55:00,,,,,,,23.8997,121.5447,298.0,963.0,22.7,97.0,0.0,0.1,0.0,20.0,0.17,10.0
95996,12,2024-10-02 16:56:00,,,,,,,23.8997,121.5447,298.0,963.0,22.7,97.0,0.0,0.1,0.0,20.0,0.17,10.0
95997,12,2024-10-02 16:57:00,,,,,,,23.8997,121.5447,298.0,963.0,22.7,97.0,0.0,0.1,0.0,20.0,0.17,10.0
95998,12,2024-10-02 16:58:00,,,,,,,23.8997,121.5447,298.0,963.0,22.7,97.0,0.0,0.1,0.0,20.0,0.17,10.0


In [11]:
def get_solar_radiation(lat, lon, datetime, temperature=12, pres=1013.25, tz='Asia/Taipei'):
    # 建立位置物件
    location = pvlib.location.Location(
       latitude=lat,
       longitude=lon,
       tz=tz
    )
    
    # 處理時間
    time = pd.Timestamp(datetime, tz=tz)
    times = pd.DatetimeIndex([time])
    
    # 計算太陽位置
    solar_position = location.get_solarposition(
       times,
       temperature=temperature,
       pressure=pres
    )
    
    # 計算晴空輻射
    clearsky = location.get_clearsky(
       times,
       pressure=pres
    )
    
    # 整理結果
    return (
        solar_position['apparent_zenith'].iloc[0],
        solar_position['zenith'].iloc[0],
        solar_position['apparent_elevation'].iloc[0],
        solar_position['elevation'].iloc[0],
        solar_position['azimuth'].iloc[0],
        clearsky['ghi'].iloc[0],
        clearsky['dni'].iloc[0],
        clearsky['dhi'].iloc[0]
    )

dataset[['apparent_zenith', 'zenith', 'apparent_elevation', 'elevation', 'azimuth', 'ghi', 'dni', 'dhi']] = dataset.progress_apply(
    lambda x: get_solar_radiation(x['lat'], x['lon'], x['DateTime'], x['temp_cwb'], x['pres_cwb']), axis=1, result_type='expand')


print('新增 apparent_zenith, zenith, apparent_elevation, elevation, azimuth, ghi, dni, dhi')

dataset

100%|██████████| 96000/96000 [35:00<00:00, 45.71it/s]

新增 apparent_zenith, zenith, apparent_elevation, elevation, azimuth, ghi, dni, dhi





Unnamed: 0,LocationCode,DateTime,WindSpeed(m/s),Pressure(hpa),Temperature(°C),Humidity(%),Sunlight(Lux),Power(mW),lat,lon,...,uvi_cwb,cloud_cwb,apparent_zenith,zenith,apparent_elevation,elevation,azimuth,ghi,dni,dhi
0,1,2024-01-17 09:00:00,,,,,,,23.8994,121.5444,...,1.82,,63.247668,63.247982,26.752332,26.752018,131.288622,429.514615,785.797366,75.799813
1,1,2024-01-17 09:01:00,,,,,,,23.8994,121.5444,...,1.82,,63.076108,63.076419,26.923892,26.923581,131.466204,432.685234,787.611928,76.049325
2,1,2024-01-17 09:02:00,,,,,,,23.8994,121.5444,...,1.82,,62.905017,62.905327,27.094983,27.094673,131.644626,435.843357,789.402018,76.296758
3,1,2024-01-17 09:03:00,,,,,,,23.8994,121.5444,...,1.82,,62.734401,62.734708,27.265599,27.265292,131.823892,438.988892,791.168037,76.542132
4,1,2024-01-17 09:04:00,,,,,,,23.8994,121.5444,...,1.82,,62.564262,62.564566,27.435738,27.435434,132.004007,442.121752,792.910382,76.785470
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95995,12,2024-10-02 16:55:00,,,,,,,23.8997,121.5447,...,0.17,10.0,80.635190,80.636063,9.364810,9.363937,261.543228,80.074206,234.876783,41.854924
95996,12,2024-10-02 16:56:00,,,,,,,23.8997,121.5447,...,0.17,10.0,80.861445,80.862337,9.138555,9.137663,261.649731,76.614636,226.626384,40.621142
95997,12,2024-10-02 16:57:00,,,,,,,23.8997,121.5447,...,0.17,10.0,81.087760,81.088673,8.912240,8.911327,261.756031,73.191495,218.305735,39.371129
95998,12,2024-10-02 16:58:00,,,,,,,23.8997,121.5447,...,0.17,10.0,81.314136,81.315069,8.685864,8.684931,261.862134,69.806874,209.920720,38.105162


In [12]:
dataset

Unnamed: 0,LocationCode,DateTime,WindSpeed(m/s),Pressure(hpa),Temperature(°C),Humidity(%),Sunlight(Lux),Power(mW),lat,lon,...,uvi_cwb,cloud_cwb,apparent_zenith,zenith,apparent_elevation,elevation,azimuth,ghi,dni,dhi
0,1,2024-01-17 09:00:00,,,,,,,23.8994,121.5444,...,1.82,,63.247668,63.247982,26.752332,26.752018,131.288622,429.514615,785.797366,75.799813
1,1,2024-01-17 09:01:00,,,,,,,23.8994,121.5444,...,1.82,,63.076108,63.076419,26.923892,26.923581,131.466204,432.685234,787.611928,76.049325
2,1,2024-01-17 09:02:00,,,,,,,23.8994,121.5444,...,1.82,,62.905017,62.905327,27.094983,27.094673,131.644626,435.843357,789.402018,76.296758
3,1,2024-01-17 09:03:00,,,,,,,23.8994,121.5444,...,1.82,,62.734401,62.734708,27.265599,27.265292,131.823892,438.988892,791.168037,76.542132
4,1,2024-01-17 09:04:00,,,,,,,23.8994,121.5444,...,1.82,,62.564262,62.564566,27.435738,27.435434,132.004007,442.121752,792.910382,76.785470
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95995,12,2024-10-02 16:55:00,,,,,,,23.8997,121.5447,...,0.17,10.0,80.635190,80.636063,9.364810,9.363937,261.543228,80.074206,234.876783,41.854924
95996,12,2024-10-02 16:56:00,,,,,,,23.8997,121.5447,...,0.17,10.0,80.861445,80.862337,9.138555,9.137663,261.649731,76.614636,226.626384,40.621142
95997,12,2024-10-02 16:57:00,,,,,,,23.8997,121.5447,...,0.17,10.0,81.087760,81.088673,8.912240,8.911327,261.756031,73.191495,218.305735,39.371129
95998,12,2024-10-02 16:58:00,,,,,,,23.8997,121.5447,...,0.17,10.0,81.314136,81.315069,8.685864,8.684931,261.862134,69.806874,209.920720,38.105162


In [13]:
def get_minutes_of_day(datetime_str):
    datetime_str = str(datetime_str)
    dt = datetime.strptime(datetime_str, '%Y-%m-%d %H:%M:%S')
    num_of_min = dt.hour * 60 + dt.minute
    
    return num_of_min # 直接回傳是第幾分鐘

dataset['DateTime'] = pd.to_datetime(dataset['DateTime'])
dataset['num_of_min'] = dataset['DateTime'].progress_apply(get_minutes_of_day)

print('計算資料為當天的第幾分鐘 num_of_min')

100%|██████████| 96000/96000 [00:01<00:00, 61385.71it/s]

計算資料為當天的第幾分鐘 num_of_min





In [14]:
dataset['day_of_year'] = dataset['DateTime'].dt.dayofyear

print('計算資料為當年的第幾天 day_of_year')

計算資料為當年的第幾天 day_of_year


In [15]:
def split_time_feature(datetime_str):
    datetime_str = str(datetime_str)
    dt = datetime.strptime(datetime_str, '%Y-%m-%d %H:%M:%S')
    
    return (dt.month, dt.day, dt.hour, dt.minute)

dataset['DateTime'] = pd.to_datetime(dataset['DateTime'])
dataset[['month', 'day', 'hour', 'min']] = dataset.progress_apply(lambda x: split_time_feature(x['DateTime']), axis=1, result_type='expand')

print('拆分每個時間特徵 month, day, hour, min')

100%|██████████| 96000/96000 [00:04<00:00, 20001.79it/s]

拆分每個時間特徵 month, day, hour, min





In [16]:
dataset['hour_sin'] = np.sin(dataset['hour'] * (2 * np.pi / 24))
dataset['hour_cos'] = np.cos(dataset['hour'] * (2 * np.pi / 24))

print('新增循環時間特徵 hour_sin, hour_cos')

新增循環時間特徵 hour_sin, hour_cos


In [17]:
dataset

Unnamed: 0,LocationCode,DateTime,WindSpeed(m/s),Pressure(hpa),Temperature(°C),Humidity(%),Sunlight(Lux),Power(mW),lat,lon,...,dni,dhi,num_of_min,day_of_year,month,day,hour,min,hour_sin,hour_cos
0,1,2024-01-17 09:00:00,,,,,,,23.8994,121.5444,...,785.797366,75.799813,540,17,1,17,9,0,0.707107,-0.707107
1,1,2024-01-17 09:01:00,,,,,,,23.8994,121.5444,...,787.611928,76.049325,541,17,1,17,9,1,0.707107,-0.707107
2,1,2024-01-17 09:02:00,,,,,,,23.8994,121.5444,...,789.402018,76.296758,542,17,1,17,9,2,0.707107,-0.707107
3,1,2024-01-17 09:03:00,,,,,,,23.8994,121.5444,...,791.168037,76.542132,543,17,1,17,9,3,0.707107,-0.707107
4,1,2024-01-17 09:04:00,,,,,,,23.8994,121.5444,...,792.910382,76.785470,544,17,1,17,9,4,0.707107,-0.707107
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95995,12,2024-10-02 16:55:00,,,,,,,23.8997,121.5447,...,234.876783,41.854924,1015,276,10,2,16,55,-0.866025,-0.500000
95996,12,2024-10-02 16:56:00,,,,,,,23.8997,121.5447,...,226.626384,40.621142,1016,276,10,2,16,56,-0.866025,-0.500000
95997,12,2024-10-02 16:57:00,,,,,,,23.8997,121.5447,...,218.305735,39.371129,1017,276,10,2,16,57,-0.866025,-0.500000
95998,12,2024-10-02 16:58:00,,,,,,,23.8997,121.5447,...,209.920720,38.105162,1018,276,10,2,16,58,-0.866025,-0.500000


In [18]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 96000 entries, 0 to 95999
Data columns (total 36 columns):
 #   Column              Non-Null Count  Dtype         
---  ------              --------------  -----         
 0   LocationCode        96000 non-null  int64         
 1   DateTime            96000 non-null  datetime64[ns]
 2   WindSpeed(m/s)      0 non-null      object        
 3   Pressure(hpa)       0 non-null      object        
 4   Temperature(°C)     0 non-null      object        
 5   Humidity(%)         0 non-null      object        
 6   Sunlight(Lux)       0 non-null      object        
 7   Power(mW)           0 non-null      object        
 8   lat                 96000 non-null  float64       
 9   lon                 96000 non-null  float64       
 10  direction           84480 non-null  float64       
 11  pres_cwb            95880 non-null  float64       
 12  temp_cwb            95880 non-null  float64       
 13  rh_cwb              95880 non-null  float64   

In [19]:
dataset.to_csv(f'testdata_1min.csv', index=False)

---

In [20]:
dataset = dataset.iloc[::10].reset_index(drop=True)

dataset

Unnamed: 0,LocationCode,DateTime,WindSpeed(m/s),Pressure(hpa),Temperature(°C),Humidity(%),Sunlight(Lux),Power(mW),lat,lon,...,dni,dhi,num_of_min,day_of_year,month,day,hour,min,hour_sin,hour_cos
0,1,2024-01-17 09:00:00,,,,,,,23.8994,121.5444,...,785.797366,75.799813,540,17,1,17,9,0,0.707107,-0.707107
1,1,2024-01-17 09:10:00,,,,,,,23.8994,121.5444,...,802.888304,78.203925,550,17,1,17,9,10,0.707107,-0.707107
2,1,2024-01-17 09:20:00,,,,,,,23.8994,121.5444,...,817.858602,80.418091,560,17,1,17,9,20,0.707107,-0.707107
3,1,2024-01-17 09:30:00,,,,,,,23.8994,121.5444,...,831.009953,82.458508,570,17,1,17,9,30,0.707107,-0.707107
4,1,2024-01-17 09:40:00,,,,,,,23.8994,121.5444,...,842.586548,84.337764,580,17,1,17,9,40,0.707107,-0.707107
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9595,12,2024-10-02 16:10:00,,,,,,,23.8997,121.5447,...,512.933498,82.471167,970,276,10,2,16,10,-0.866025,-0.500000
9596,12,2024-10-02 16:20:00,,,,,,,23.8997,121.5447,...,465.997120,75.510678,980,276,10,2,16,20,-0.866025,-0.500000
9597,12,2024-10-02 16:30:00,,,,,,,23.8997,121.5447,...,411.427721,67.561674,990,276,10,2,16,30,-0.866025,-0.500000
9598,12,2024-10-02 16:40:00,,,,,,,23.8997,121.5447,...,348.005689,58.398848,1000,276,10,2,16,40,-0.866025,-0.500000


In [21]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9600 entries, 0 to 9599
Data columns (total 36 columns):
 #   Column              Non-Null Count  Dtype         
---  ------              --------------  -----         
 0   LocationCode        9600 non-null   int64         
 1   DateTime            9600 non-null   datetime64[ns]
 2   WindSpeed(m/s)      0 non-null      object        
 3   Pressure(hpa)       0 non-null      object        
 4   Temperature(°C)     0 non-null      object        
 5   Humidity(%)         0 non-null      object        
 6   Sunlight(Lux)       0 non-null      object        
 7   Power(mW)           0 non-null      object        
 8   lat                 9600 non-null   float64       
 9   lon                 9600 non-null   float64       
 10  direction           8448 non-null   float64       
 11  pres_cwb            9588 non-null   float64       
 12  temp_cwb            9588 non-null   float64       
 13  rh_cwb              9588 non-null   float64     

In [22]:
dataset.to_csv(f'testdata.csv', index=False)