In [1]:
import os
import random
import math
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import r2_score

In [29]:
class MYDataset(Dataset):
    def __init__(self, df):
        self.df = df
        self.images = df.iloc[:,5:].values
        self.coef = df.iloc[:,1:5].values
        self.labels = df.iloc[:, 0].values
        
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]
        coef = self.coef[idx]
        
        image = torch.tensor(image, dtype=torch.float)
        label = torch.tensor(label, dtype=torch.float)
        coef = torch.tensor(coef, dtype=torch.float)

        return image, coef, label

In [30]:
dataset = pd.read_csv("D://CO2_data4.csv", encoding="utf-8")
dataset.shape[0]
train_li = random.sample([i for i in range(0, dataset.shape[0])], int(0.8 * dataset.shape[0]))
train_li.sort()

j = 0
test_li = []


for i in range(0, dataset.shape[0], 1):
    if i != train_li[j]:
        test_li.append(i)
    else:
        j = j + 1

train_set = dataset.iloc[train_li, :]
test_set = dataset.iloc[test_li, :]

In [31]:
def compute_distances(P, C):
    A = (P**2).sum(axis=1, keepdims=True)
 
    B = (C**2).sum(axis=1, keepdims=True).T
 
    return np.sqrt(A + B - 2* np.dot(P, C.T))

In [47]:
train_df = pd.DataFrame()


dataset = train_set.reset_index(drop=True)
ycor = dataset.lat
#ycor = dataset.lon
label = dataset.fCO2

train_df['label'] = label

train_df['beta'] = np.ones(5283)
train_df['Chl'] = dataset.Chl
train_df['Temp'] = dataset.Temp
train_df['Salt'] = dataset.Salt

alist = dataset.lon
temp = []
for i in alist:
    if i < 0:
        i = i+360
    temp.append(i)
xcor = temp

cor_df = pd.DataFrame()
cor_df['xcor'] = xcor
cor_df['ycor'] = ycor

a = [[110.0, 0.0], [290.0,0.0], [110.0, 70.0], [290.0, 70.0]]
b = np.array(a)

cor_li = cor_df.to_numpy()
dis_li = compute_distances(cor_li, b)
dis_df = pd.DataFrame(dis_li)
train_df = train_df.join(dis_df)



train_data = MYDataset(train_df)
#test_data = MYDataset(test_df)
train_loader = DataLoader(train_data, batch_size=50, shuffle=True, num_workers=0, drop_last=True)
#test_loader = DataLoader(test_data, batch_size=64, shuffle=False, num_workers=0)

In [48]:

dataset

Unnamed: 0,date,lon,lat,fCO2,Chl,Temp,Salt
0,1998/7/16,-110.25,22.25,1.429020,0.932204,1.035501,3.714037
1,1998/7/16,-111.75,22.25,0.653314,0.865872,0.980157,3.665900
2,1998/7/16,-111.25,23.25,1.835711,0.874641,0.896934,3.617013
3,1998/7/16,-112.25,23.25,1.925815,0.861615,0.827733,3.544569
4,1998/7/16,-113.25,25.25,1.074433,0.922670,0.545926,3.229704
...,...,...,...,...,...,...,...
5278,2020/7/16,137.75,13.25,1.178217,0.770692,1.481354,-0.398306
5279,2020/7/16,137.25,12.75,1.116893,0.768039,1.489461,-0.467678
5280,2020/7/16,137.75,8.75,0.849520,0.791038,1.508860,-0.471190
5281,2020/7/16,137.25,14.25,1.219482,0.770857,1.481271,-0.520788


In [49]:
class GNNWR(nn.Module):
    def __init__(self, insize, outsize):
        super(GNNWR, self).__init__()
        self.insize = insize
        self.outsize = outsize

        lastsize = self.insize
        thissize = 0
        self.fc = nn.Sequential()
        i = 2

        self.fc.add_module("full"+str(1), nn.Linear(4, 600))
        # self.fc.add_module("batc"+str(1), nn.BatchNorm1d(600))
        # self.fc.add_module("acti"+str(1), nn.PReLU(init=0.4))
        # self.fc.add_module("drop"+str(1), nn.Dropout(0.2))

        lastsize = 600
        while math.pow(2, int(math.log2(lastsize))) >= max(128, outsize + 1):
            if i == 1:
                thissize = int(math.pow(2, int(math.log2(lastsize))))
            else:
                thissize = int(math.pow(2, int(math.log2(lastsize)) - 1))
            
            self.fc.add_module("full"+str(i), nn.Linear(lastsize, thissize))
            self.fc.add_module("batc"+str(i), nn.BatchNorm1d(thissize))
            self.fc.add_module("acti"+str(i), nn.PReLU(init=0.4))
            
            self.fc.add_module("drop"+str(i), nn.Dropout(0.2))

            lastsize = thissize
            i = i + 1

        self.fc.add_module("full"+str(i), nn.Linear(lastsize, outsize))
        
    def forward(self, x):
        x = self.fc(x)
        return x

model = GNNWR(623, 4)
criterion = nn.MSELoss(reduce=True)
optimizer = optim.SGD(model.parameters(), lr=0.1)



In [50]:
r2 = 0
weightlist = []
for i in range(1,2):
    temp = []
    temp.append(-0.172075)
    temp.append(-0.175203)
    temp.append(0.294790)
    temp.append(0.385374)
    weightlist.append(temp)
out = nn.Linear(4, 1, bias = False)
out.weight = nn.Parameter(torch.tensor(weightlist), requires_grad=False)

def train(epoch):
    model.train()
    train_loss = 0
    global r2
    global out
    for data, coef, label in train_loader:
        data = data.view(50, -1)
        label = label.view(50, -1)
        optimizer.zero_grad()

        output = model(data)
        output = output.mul(coef)
        output = out(output)

        loss = criterion(output, label)
        loss.backward()
        optimizer.step()
        a = output.view(-1).detach().numpy()
        b = label.view(-1).numpy()
        if epoch % 100 == 0:
            r2 = r2_score(a, b)

        train_loss += loss.item()*data.size(0)
    train_loss = train_loss/len(train_loader.dataset)
    print('Epoch: {} \tTraining Loss: {:.6f} \tR2: {:.6f}'.format(epoch, train_loss, r2))

In [51]:
for epoch in range(1, 200000+1):
    train(epoch)


Epoch: 1 	Training Loss: 0.991468 	R2: 0.000000
Epoch: 2 	Training Loss: 0.881064 	R2: 0.000000
Epoch: 3 	Training Loss: 0.868819 	R2: 0.000000
Epoch: 4 	Training Loss: 0.846512 	R2: 0.000000
Epoch: 5 	Training Loss: 0.845521 	R2: 0.000000
Epoch: 6 	Training Loss: 0.834410 	R2: 0.000000
Epoch: 7 	Training Loss: 0.816901 	R2: 0.000000
Epoch: 8 	Training Loss: 0.814040 	R2: 0.000000
Epoch: 9 	Training Loss: 0.812420 	R2: 0.000000
Epoch: 10 	Training Loss: 0.828404 	R2: 0.000000
Epoch: 11 	Training Loss: 0.803942 	R2: 0.000000
Epoch: 12 	Training Loss: 0.796138 	R2: 0.000000
Epoch: 13 	Training Loss: 0.789802 	R2: 0.000000
Epoch: 14 	Training Loss: 0.789559 	R2: 0.000000
Epoch: 15 	Training Loss: 0.792491 	R2: 0.000000
Epoch: 16 	Training Loss: 0.784210 	R2: 0.000000
Epoch: 17 	Training Loss: 0.777894 	R2: 0.000000
Epoch: 18 	Training Loss: 0.773159 	R2: 0.000000
Epoch: 19 	Training Loss: 0.764474 	R2: 0.000000
Epoch: 20 	Training Loss: 0.763126 	R2: 0.000000
Epoch: 21 	Training Loss: 0.7

KeyboardInterrupt: 