In [1]:
import numpy as np
import pandas as pd

from datetime import datetime

---

In [2]:
only_train = True # 所有資料都會用在 training

test_range = True # 是否只使用 6:00 ~ 19:00 的資料

merge_10min = False # 把每筆資料合併成 10 分鐘 1 筆

fill_valud = True # 是否要補值

train_code = [i for i in range(1, 18)] # 要使用哪些 Location code 的資料

In [3]:
# 選擇特徵，註解的是要使用的特徵
feature_removed = [
    # 'LocationCode',
    'DateTime',
    'WindSpeed(m/s)',
    'Pressure(hpa)',
    'Temperature(°C)',
    'Humidity(%)',
    'Sunlight(Lux)',
    'Power(mW)',
    'lat',
    'lon',
    # 'direction',
    'pres_cwb',
    # 'temp_cwb',
    'rh_cwb',
    'precp_cwb',
    # 'rad_cwb',
    'sun_cwb',
    'visb_cwb',
    'uvi_cwb',
    'cloud_cwb',
    # 'apparent_zenith',
    'zenith',
    # 'apparent_elevation',
    'elevation',
    # 'azimuth',
    # 'ghi',
    # 'dni',
    # 'dhi',
    'num_of_min',
    # 'day_of_year',
    'month',
    'day',
    # 'hour',
    # 'min',
    'hour_sin',
    'hour_cos',
]

In [4]:
def fill_value(data): # 補缺失值
    if fill_valud:
        return data.fillna({
            'pres_cwb': 0,
            'temp_cwb': 0,
            'rh_cwb': 0,
            'precp_cwb': 0,
            'rad_cwb': 0,
            'sun_cwb': 0,
            'visb_cwb': 0,
            'uvi_cwb': 0,
            'cloud_cwb': 0,
            
            'apparent_zenith': 0,
            'zenith': 0,
            'apparent_elevation': 0,
            'elevation': 0,
            'azimuth': 0,
            'ghi': 0,
            'dni': 0,
            'dhi': 0,
        })
    else:
        return data

In [5]:
def remove_feature(data):
    X = data.drop(feature_removed, axis=1) # features
    y = data.get(['Power(mW)']) # target
    
    X = fill_value(X)
    
    return X, y

---

In [6]:
dataset = []
for i in range(1, 18):
    dataset.append(pd.read_csv(f'/kaggle/input/ai-cup-training-set/{i}.csv'))

In [7]:
dataset[0]

Unnamed: 0,LocationCode,DateTime,WindSpeed(m/s),Pressure(hpa),Temperature(°C),Humidity(%),Sunlight(Lux),Power(mW),lat,lon,...,dni,dhi,num_of_min,day_of_year,month,day,hour,min,hour_sin,hour_cos
0,1,2024-01-01 06:31:08,0.0,1016.50,17.5,86.8,25.00,0.0,23.8994,121.5444,...,0.000000,0.000000,391,1,1,1,6,31,1.0,6.123234e-17
1,1,2024-01-01 06:32:08,0.0,1016.53,17.5,86.7,28.33,0.0,23.8994,121.5444,...,0.000000,0.000000,392,1,1,1,6,32,1.0,6.123234e-17
2,1,2024-01-01 06:33:08,0.0,1016.57,17.5,86.7,32.50,0.0,23.8994,121.5444,...,0.000000,0.000000,393,1,1,1,6,33,1.0,6.123234e-17
3,1,2024-01-01 06:34:08,0.0,1016.58,17.5,86.7,39.17,0.0,23.8994,121.5444,...,0.000000,0.000000,394,1,1,1,6,34,1.0,6.123234e-17
4,1,2024-01-01 06:35:08,0.0,1016.59,17.5,86.7,45.83,0.0,23.8994,121.5444,...,0.000000,0.000000,395,1,1,1,6,35,1.0,6.123234e-17
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
101668,1,2024-07-24 18:17:07,0.0,959.80,25.1,100.0,23.33,0.0,23.8994,121.5444,...,24.373787,9.343167,1097,206,7,24,18,17,-1.0,-1.836970e-16
101669,1,2024-07-24 18:18:07,0.0,959.92,25.1,100.0,23.33,0.0,23.8994,121.5444,...,20.844124,8.271200,1098,206,7,24,18,18,-1.0,-1.836970e-16
101670,1,2024-07-24 18:19:07,0.0,960.09,25.1,100.0,33.33,0.0,23.8994,121.5444,...,17.609828,7.252858,1099,206,7,24,18,19,-1.0,-1.836970e-16
101671,1,2024-07-24 18:20:07,0.0,959.40,25.1,100.0,40.00,0.0,23.8994,121.5444,...,14.677345,6.292753,1100,206,7,24,18,20,-1.0,-1.836970e-16


In [8]:
if test_range:
    for i in range(len(dataset)):
        dataset[i]['DateTime'] = pd.to_datetime(dataset[i]['DateTime'])

        # 設定 DateTime 為索引
        dataset[i].set_index('DateTime', inplace=True)
        dataset[i] = dataset[i].between_time('06:00', '19:00')

        # 重設索引，將 DateTime 變回一個欄位
        dataset[i].reset_index(inplace=True)
    
    print('只使用 06:00 ~ 19:00 的資料進行訓練')

只使用 06:00 ~ 19:00 的資料進行訓練


In [9]:
if merge_10min:
    for i in range(len(dataset)):
        df = dataset[i].copy()

        # 將 DateTime 欄位轉換為 datetime 類型
        df['DateTime'] = pd.to_datetime(df['DateTime'])

        # 設定 DateTime 為索引
        df.set_index('DateTime', inplace=True)

        # 使用 resample 進行 10 分鐘分組並計算平均值
        # '10min' 表示 10 分鐘
        df_resampled = df.resample('10min').agg({
            'LocationCode': 'first',  # 取第一個位置代碼
            'WindSpeed(m/s)': 'mean',
            'Pressure(hpa)': 'mean',
            'Temperature(°C)': 'mean',
            'Humidity(%)': 'mean',
            'Sunlight(Lux)': 'mean',
            'Power(mW)': 'mean',
            'lat': 'first',
            'lon': 'first',
            'direction': 'first',
            'temp_cwb': 'mean',
            'rh_cwb': 'mean',
            'precp_cwb': 'mean',
            'rad_cwb': 'mean',
            'sun_cwb': 'mean'
        })

        df_resampled = df_resampled.dropna(subset=['LocationCode'])

        # 重設索引，將 DateTime 變回一個欄位
        df_resampled.reset_index(inplace=True)

        df_resampled['LocationCode'] = df_resampled['LocationCode'].astype(int).astype('category')

        dataset[i] = df_resampled.round(2)
    
    print('將資料合併為 10 分鐘一筆')

In [10]:
dataset[0]

Unnamed: 0,DateTime,LocationCode,WindSpeed(m/s),Pressure(hpa),Temperature(°C),Humidity(%),Sunlight(Lux),Power(mW),lat,lon,...,dni,dhi,num_of_min,day_of_year,month,day,hour,min,hour_sin,hour_cos
0,2024-01-01 06:31:08,1,0.0,1016.50,17.5,86.8,25.00,0.0,23.8994,121.5444,...,0.000000,0.000000,391,1,1,1,6,31,1.0,6.123234e-17
1,2024-01-01 06:32:08,1,0.0,1016.53,17.5,86.7,28.33,0.0,23.8994,121.5444,...,0.000000,0.000000,392,1,1,1,6,32,1.0,6.123234e-17
2,2024-01-01 06:33:08,1,0.0,1016.57,17.5,86.7,32.50,0.0,23.8994,121.5444,...,0.000000,0.000000,393,1,1,1,6,33,1.0,6.123234e-17
3,2024-01-01 06:34:08,1,0.0,1016.58,17.5,86.7,39.17,0.0,23.8994,121.5444,...,0.000000,0.000000,394,1,1,1,6,34,1.0,6.123234e-17
4,2024-01-01 06:35:08,1,0.0,1016.59,17.5,86.7,45.83,0.0,23.8994,121.5444,...,0.000000,0.000000,395,1,1,1,6,35,1.0,6.123234e-17
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
96661,2024-07-24 18:17:07,1,0.0,959.80,25.1,100.0,23.33,0.0,23.8994,121.5444,...,24.373787,9.343167,1097,206,7,24,18,17,-1.0,-1.836970e-16
96662,2024-07-24 18:18:07,1,0.0,959.92,25.1,100.0,23.33,0.0,23.8994,121.5444,...,20.844124,8.271200,1098,206,7,24,18,18,-1.0,-1.836970e-16
96663,2024-07-24 18:19:07,1,0.0,960.09,25.1,100.0,33.33,0.0,23.8994,121.5444,...,17.609828,7.252858,1099,206,7,24,18,19,-1.0,-1.836970e-16
96664,2024-07-24 18:20:07,1,0.0,959.40,25.1,100.0,40.00,0.0,23.8994,121.5444,...,14.677345,6.292753,1100,206,7,24,18,20,-1.0,-1.836970e-16


In [11]:
train_dataset = pd.concat([dataset[i-1] for i in train_code], ignore_index=1)

In [12]:
print('training set: ', ', '.join(map(str, train_code)))

if only_train:
    print('train (only)')
    
    train = pd.concat([train_dataset.loc[train_dataset['LocationCode']==i] for i in train_code], ignore_index=1)
    valid = pd.concat([train_dataset.loc[train_dataset['LocationCode']==train_code[-1]]], ignore_index=1)
    
else:
    print('train (not only)')
    
    train = pd.concat([train_dataset.loc[train_dataset['LocationCode']==i] for i in train_code[:-1]], ignore_index=1)
    valid = pd.concat([train_dataset.loc[train_dataset['LocationCode']==i] for i in train_code[-1]], ignore_index=1)

training set:  1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
train (only)


In [13]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1302366 entries, 0 to 1302365
Data columns (total 36 columns):
 #   Column              Non-Null Count    Dtype         
---  ------              --------------    -----         
 0   DateTime            1302366 non-null  datetime64[ns]
 1   LocationCode        1302366 non-null  int64         
 2   WindSpeed(m/s)      1302366 non-null  float64       
 3   Pressure(hpa)       1302366 non-null  float64       
 4   Temperature(°C)     1302366 non-null  float64       
 5   Humidity(%)         1302366 non-null  float64       
 6   Sunlight(Lux)       1302366 non-null  float64       
 7   Power(mW)           1302366 non-null  float64       
 8   lat                 1302366 non-null  float64       
 9   lon                 1302366 non-null  float64       
 10  direction           1209685 non-null  float64       
 11  pres_cwb            1301886 non-null  float64       
 12  temp_cwb            1301886 non-null  float64       
 13  rh_cwb      

In [14]:
valid.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 92681 entries, 0 to 92680
Data columns (total 36 columns):
 #   Column              Non-Null Count  Dtype         
---  ------              --------------  -----         
 0   DateTime            92681 non-null  datetime64[ns]
 1   LocationCode        92681 non-null  int64         
 2   WindSpeed(m/s)      92681 non-null  float64       
 3   Pressure(hpa)       92681 non-null  float64       
 4   Temperature(°C)     92681 non-null  float64       
 5   Humidity(%)         92681 non-null  float64       
 6   Sunlight(Lux)       92681 non-null  float64       
 7   Power(mW)           92681 non-null  float64       
 8   lat                 92681 non-null  float64       
 9   lon                 92681 non-null  float64       
 10  direction           0 non-null      float64       
 11  pres_cwb            92681 non-null  float64       
 12  temp_cwb            92681 non-null  float64       
 13  rh_cwb              92681 non-null  float64   

In [15]:
train_X, train_y = remove_feature(train)
valid_X, valid_y = remove_feature(valid)

In [16]:
train_X

Unnamed: 0,LocationCode,direction,temp_cwb,rad_cwb,apparent_zenith,apparent_elevation,azimuth,ghi,dni,dhi,day_of_year,hour,min
0,1,181.0,14.5,0.0,91.917932,-1.917932,114.450417,0.000000,0.000000,0.000000,1,6,31
1,1,181.0,14.5,0.0,91.709992,-1.709992,114.548622,0.000000,0.000000,0.000000,1,6,32
2,1,181.0,14.5,0.0,91.502216,-1.502216,114.647162,0.000000,0.000000,0.000000,1,6,33
3,1,181.0,14.5,0.0,91.294603,-1.294603,114.746037,0.000000,0.000000,0.000000,1,6,34
4,1,181.0,14.5,0.0,91.087156,-1.087156,114.845251,0.000000,0.000000,0.000000,1,6,35
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1302361,17,,31.6,1.8,47.554966,42.445034,278.396905,594.551396,677.751829,137.148378,194,15,26
1302362,17,,31.6,1.8,47.784701,42.215299,278.469019,591.250877,676.329372,136.812645,194,15,27
1302363,17,,31.6,1.8,48.010628,41.989372,278.539940,587.994166,674.916117,136.480078,194,15,28
1302364,17,,31.6,1.8,48.236514,41.763486,278.610851,584.727322,673.488707,136.145177,194,15,29


In [17]:
train_X

Unnamed: 0,LocationCode,direction,temp_cwb,rad_cwb,apparent_zenith,apparent_elevation,azimuth,ghi,dni,dhi,day_of_year,hour,min
0,1,181.0,14.5,0.0,91.917932,-1.917932,114.450417,0.000000,0.000000,0.000000,1,6,31
1,1,181.0,14.5,0.0,91.709992,-1.709992,114.548622,0.000000,0.000000,0.000000,1,6,32
2,1,181.0,14.5,0.0,91.502216,-1.502216,114.647162,0.000000,0.000000,0.000000,1,6,33
3,1,181.0,14.5,0.0,91.294603,-1.294603,114.746037,0.000000,0.000000,0.000000,1,6,34
4,1,181.0,14.5,0.0,91.087156,-1.087156,114.845251,0.000000,0.000000,0.000000,1,6,35
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1302361,17,,31.6,1.8,47.554966,42.445034,278.396905,594.551396,677.751829,137.148378,194,15,26
1302362,17,,31.6,1.8,47.784701,42.215299,278.469019,591.250877,676.329372,136.812645,194,15,27
1302363,17,,31.6,1.8,48.010628,41.989372,278.539940,587.994166,674.916117,136.480078,194,15,28
1302364,17,,31.6,1.8,48.236514,41.763486,278.610851,584.727322,673.488707,136.145177,194,15,29


In [18]:
train_y

Unnamed: 0,Power(mW)
0,0.00
1,0.00
2,0.00
3,0.00
4,0.00
...,...
1302361,35.91
1302362,38.23
1302363,39.95
1302364,38.51


In [19]:
train_X.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1302366 entries, 0 to 1302365
Data columns (total 13 columns):
 #   Column              Non-Null Count    Dtype  
---  ------              --------------    -----  
 0   LocationCode        1302366 non-null  int64  
 1   direction           1209685 non-null  float64
 2   temp_cwb            1302366 non-null  float64
 3   rad_cwb             1302366 non-null  float64
 4   apparent_zenith     1302366 non-null  float64
 5   apparent_elevation  1302366 non-null  float64
 6   azimuth             1302366 non-null  float64
 7   ghi                 1302366 non-null  float64
 8   dni                 1302366 non-null  float64
 9   dhi                 1302366 non-null  float64
 10  day_of_year         1302366 non-null  int64  
 11  hour                1302366 non-null  int64  
 12  min                 1302366 non-null  int64  
dtypes: float64(9), int64(4)
memory usage: 129.2 MB


In [20]:
train_y.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1302366 entries, 0 to 1302365
Data columns (total 1 columns):
 #   Column     Non-Null Count    Dtype  
---  ------     --------------    -----  
 0   Power(mW)  1302366 non-null  float64
dtypes: float64(1)
memory usage: 9.9 MB


In [21]:
valid_X.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 92681 entries, 0 to 92680
Data columns (total 13 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   LocationCode        92681 non-null  int64  
 1   direction           0 non-null      float64
 2   temp_cwb            92681 non-null  float64
 3   rad_cwb             92681 non-null  float64
 4   apparent_zenith     92681 non-null  float64
 5   apparent_elevation  92681 non-null  float64
 6   azimuth             92681 non-null  float64
 7   ghi                 92681 non-null  float64
 8   dni                 92681 non-null  float64
 9   dhi                 92681 non-null  float64
 10  day_of_year         92681 non-null  int64  
 11  hour                92681 non-null  int64  
 12  min                 92681 non-null  int64  
dtypes: float64(9), int64(4)
memory usage: 9.2 MB


In [22]:
valid_y.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 92681 entries, 0 to 92680
Data columns (total 1 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Power(mW)  92681 non-null  float64
dtypes: float64(1)
memory usage: 724.2 KB


In [23]:
train_X.to_csv('train_X.csv', index=False)
train_y.to_csv('train_y.csv', index=False)

valid_X.to_csv('valid_X.csv', index=False)
valid_y.to_csv('valid_y.csv', index=False)

---

In [24]:
testdata = pd.read_csv('/kaggle/input/ai-cup-testset/testdata.csv')

testdata

Unnamed: 0,LocationCode,DateTime,WindSpeed(m/s),Pressure(hpa),Temperature(°C),Humidity(%),Sunlight(Lux),Power(mW),lat,lon,...,dni,dhi,num_of_min,day_of_year,month,day,hour,min,hour_sin,hour_cos
0,1,2024-01-17 09:00:00,,,,,,,23.8994,121.5444,...,785.797366,75.799813,540,17,1,17,9,0,0.707107,-0.707107
1,1,2024-01-17 09:10:00,,,,,,,23.8994,121.5444,...,802.888304,78.203925,550,17,1,17,9,10,0.707107,-0.707107
2,1,2024-01-17 09:20:00,,,,,,,23.8994,121.5444,...,817.858602,80.418091,560,17,1,17,9,20,0.707107,-0.707107
3,1,2024-01-17 09:30:00,,,,,,,23.8994,121.5444,...,831.009953,82.458508,570,17,1,17,9,30,0.707107,-0.707107
4,1,2024-01-17 09:40:00,,,,,,,23.8994,121.5444,...,842.586548,84.337764,580,17,1,17,9,40,0.707107,-0.707107
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9595,12,2024-10-02 16:10:00,,,,,,,23.8997,121.5447,...,512.933498,82.471167,970,276,10,2,16,10,-0.866025,-0.500000
9596,12,2024-10-02 16:20:00,,,,,,,23.8997,121.5447,...,465.997120,75.510678,980,276,10,2,16,20,-0.866025,-0.500000
9597,12,2024-10-02 16:30:00,,,,,,,23.8997,121.5447,...,411.427721,67.561674,990,276,10,2,16,30,-0.866025,-0.500000
9598,12,2024-10-02 16:40:00,,,,,,,23.8997,121.5447,...,348.005689,58.398848,1000,276,10,2,16,40,-0.866025,-0.500000


In [25]:
testdata = testdata.drop(feature_removed, axis=1)

testdata = fill_value(testdata)

testdata

Unnamed: 0,LocationCode,direction,temp_cwb,rad_cwb,apparent_zenith,apparent_elevation,azimuth,ghi,dni,dhi,day_of_year,hour,min
0,1,181.0,17.3,0.8,63.247668,26.752332,131.288622,429.514615,785.797366,75.799813,17,9,0
1,1,181.0,17.3,0.8,61.553679,28.446321,133.102785,460.647960,802.888304,78.203925,17,9,10
2,1,181.0,17.3,0.8,59.910304,30.089696,135.005108,490.455774,817.858602,80.418091,17,9,20
3,1,181.0,17.3,0.8,58.321862,31.678138,137.000168,518.860931,831.009953,82.458508,17,9,30
4,1,181.0,17.3,0.8,56.793003,33.206997,139.092322,545.793325,842.586548,84.337764,17,9,40
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9595,12,298.0,22.7,0.1,70.532843,19.467157,256.478029,253.414833,512.933498,82.471167,276,16,10
9596,12,298.0,22.7,0.1,72.762170,17.237830,257.658458,213.603810,465.997120,75.510678,276,16,20
9597,12,298.0,22.7,0.1,75.001377,14.998623,258.803926,174.037599,411.427721,67.561674,276,16,30
9598,12,298.0,22.7,0.1,77.249297,12.750703,259.918907,135.207120,348.005689,58.398848,276,16,40


In [26]:
testdata.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9600 entries, 0 to 9599
Data columns (total 13 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   LocationCode        9600 non-null   int64  
 1   direction           8448 non-null   float64
 2   temp_cwb            9600 non-null   float64
 3   rad_cwb             9600 non-null   float64
 4   apparent_zenith     9600 non-null   float64
 5   apparent_elevation  9600 non-null   float64
 6   azimuth             9600 non-null   float64
 7   ghi                 9600 non-null   float64
 8   dni                 9600 non-null   float64
 9   dhi                 9600 non-null   float64
 10  day_of_year         9600 non-null   int64  
 11  hour                9600 non-null   int64  
 12  min                 9600 non-null   int64  
dtypes: float64(9), int64(4)
memory usage: 975.1 KB


In [27]:
testdata.to_csv('testdata.csv', index=False)

---

In [28]:
testdata_1min = pd.read_csv('/kaggle/input/ai-cup-testset/testdata_1min.csv')

testdata_1min

Unnamed: 0,LocationCode,DateTime,WindSpeed(m/s),Pressure(hpa),Temperature(°C),Humidity(%),Sunlight(Lux),Power(mW),lat,lon,...,dni,dhi,num_of_min,day_of_year,month,day,hour,min,hour_sin,hour_cos
0,1,2024-01-17 09:00:00,,,,,,,23.8994,121.5444,...,785.797366,75.799813,540,17,1,17,9,0,0.707107,-0.707107
1,1,2024-01-17 09:01:00,,,,,,,23.8994,121.5444,...,787.611928,76.049325,541,17,1,17,9,1,0.707107,-0.707107
2,1,2024-01-17 09:02:00,,,,,,,23.8994,121.5444,...,789.402018,76.296758,542,17,1,17,9,2,0.707107,-0.707107
3,1,2024-01-17 09:03:00,,,,,,,23.8994,121.5444,...,791.168037,76.542132,543,17,1,17,9,3,0.707107,-0.707107
4,1,2024-01-17 09:04:00,,,,,,,23.8994,121.5444,...,792.910382,76.785470,544,17,1,17,9,4,0.707107,-0.707107
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95995,12,2024-10-02 16:55:00,,,,,,,23.8997,121.5447,...,234.876783,41.854924,1015,276,10,2,16,55,-0.866025,-0.500000
95996,12,2024-10-02 16:56:00,,,,,,,23.8997,121.5447,...,226.626384,40.621142,1016,276,10,2,16,56,-0.866025,-0.500000
95997,12,2024-10-02 16:57:00,,,,,,,23.8997,121.5447,...,218.305735,39.371129,1017,276,10,2,16,57,-0.866025,-0.500000
95998,12,2024-10-02 16:58:00,,,,,,,23.8997,121.5447,...,209.920720,38.105162,1018,276,10,2,16,58,-0.866025,-0.500000


In [29]:
testdata_1min = testdata_1min.drop(feature_removed, axis=1)

testdata_1min = fill_value(testdata_1min)

testdata_1min

Unnamed: 0,LocationCode,direction,temp_cwb,rad_cwb,apparent_zenith,apparent_elevation,azimuth,ghi,dni,dhi,day_of_year,hour,min
0,1,181.0,17.3,0.8,63.247668,26.752332,131.288622,429.514615,785.797366,75.799813,17,9,0
1,1,181.0,17.3,0.8,63.076108,26.923892,131.466204,432.685234,787.611928,76.049325,17,9,1
2,1,181.0,17.3,0.8,62.905017,27.094983,131.644626,435.843357,789.402018,76.296758,17,9,2
3,1,181.0,17.3,0.8,62.734401,27.265599,131.823892,438.988892,791.168037,76.542132,17,9,3
4,1,181.0,17.3,0.8,62.564262,27.435738,132.004007,442.121752,792.910382,76.785470,17,9,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...
95995,12,298.0,22.7,0.1,80.635190,9.364810,261.543228,80.074206,234.876783,41.854924,276,16,55
95996,12,298.0,22.7,0.1,80.861445,9.138555,261.649731,76.614636,226.626384,40.621142,276,16,56
95997,12,298.0,22.7,0.1,81.087760,8.912240,261.756031,73.191495,218.305735,39.371129,276,16,57
95998,12,298.0,22.7,0.1,81.314136,8.685864,261.862134,69.806874,209.920720,38.105162,276,16,58


In [30]:
testdata_1min.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 96000 entries, 0 to 95999
Data columns (total 13 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   LocationCode        96000 non-null  int64  
 1   direction           84480 non-null  float64
 2   temp_cwb            96000 non-null  float64
 3   rad_cwb             96000 non-null  float64
 4   apparent_zenith     96000 non-null  float64
 5   apparent_elevation  96000 non-null  float64
 6   azimuth             96000 non-null  float64
 7   ghi                 96000 non-null  float64
 8   dni                 96000 non-null  float64
 9   dhi                 96000 non-null  float64
 10  day_of_year         96000 non-null  int64  
 11  hour                96000 non-null  int64  
 12  min                 96000 non-null  int64  
dtypes: float64(9), int64(4)
memory usage: 9.5 MB


In [31]:
testdata_1min.to_csv('testdata_1min.csv', index=False)