In [1]:
# Quantum
import pennylane as qml
# PyTorch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
# Numpy, Pandas
import numpy as np
import pandas as pd
# Layer
from kan import KAN
from RNN_block import RNN_block
# Data processing
from fucntions import data_seq, train_seq
from sklearn.preprocessing import MinMaxScaler

In [None]:
class data_pre:
    def __init__(self, filename = "./weatherAUS.csv", chk = False):
        self.df = pd.read_csv(filename)
        
        df = self.df
        locations = df['Location'].unique()
        self.location_dfs = {location: df[df['Location'] == location].reset_index(drop=False) for location in locations}
        
        if chk:
            print(self.df.keys())
    
    def make_data_with_keys(self, key_features, key_label):
        train_data_dict = dict()
        label_data_dict = dict()
        for e in self.location_dfs:
            train_data_dict[e] = self.location_dfs[e][key_features]
            for key in key_features:
                mean_value = train_data_dict[e][key].mean(axis = 0)
                train_data_dict[e][key].fillna(mean_value, inplace = True)

            label_data_dict[e] = self.location_dfs[e][[key_label]]
            label_data_dict[e].loc[label_data_dict[e]['RainTomorrow'] == 'Yes', 'RainTomorrow'] = 1
            label_data_dict[e].loc[label_data_dict[e]['RainTomorrow'] == 'No', 'RainTomorrow'] = -1
            label_data_dict[e].fillna(0, inplace = True)
        return train_data_dict, label_data_dict

    def chk_distribution(self, location = "all"):
        location_dict = self.location_dfs
        for e in location_dict:
            if location == "all" or e == location:
                df = location_dict[e]
                print(f"\n\n======== {e} ========")
                print('==== MEAN ====')
                print(df[['MinTemp', 'MaxTemp']].mean())
                print('==== STD ====')
                print(df[['MinTemp', 'MaxTemp']].std())

    def split_about_location(self):
        '''
            location마다의 dataframe을 딕셔너리로 만들어 return
        '''
        df = self.df
        locations = df['Location'].unique()
        location_dfs = {location: df[df['Location'] == location].reset_index(drop=True) for location in locations}
        return location_dfs
    
        

In [3]:
data = data_pre(chk = False)
train_data_dict, label_data_dict = data.make_data_with_keys(['MinTemp', 'MaxTemp', 'Rainfall', 'Humidity3pm', 'Pressure3pm'], 'RainTomorrow')

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  train_data_dict[e][key].fillna(mean_value, inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_data_dict[e][key].fillna(mean_value, inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  label_data_dict[e].loc[label_data_dict[e]['RainTomorrow'] == 'Yes', 'RainTomorrow'

In [6]:
for e in train_data_dict:
    print(e)
    train_data_dict[e].to_csv("./data/train_data_{}.csv".format(e), header = True, index = False)
    label_data_dict[e].to_csv("./data/label_data_{}.csv".format(e), header = True, index = False)

Albury
BadgerysCreek
Cobar
CoffsHarbour
Moree
Newcastle
NorahHead
NorfolkIsland
Penrith
Richmond
Sydney
SydneyAirport
WaggaWagga
Williamtown
Wollongong
Canberra
Tuggeranong
MountGinini
Ballarat
Bendigo
Sale
MelbourneAirport
Melbourne
Mildura
Nhil
Portland
Watsonia
Dartmoor
Brisbane
Cairns
GoldCoast
Townsville
Adelaide
MountGambier
Nuriootpa
Woomera
Albany
Witchcliffe
PearceRAAF
PerthAirport
Perth
SalmonGums
Walpole
Hobart
Launceston
AliceSprings
Darwin
Katherine
Uluru
