In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.backends.cudnn as cudnn

import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader, TensorDataset
import os
import argparse

from torchvision import datasets, transforms
from pytorch_model_summary import summary

import numpy as np
import os
from dataloader import Syn_data, data_processing
from model import LinearRegression
from loss import loss_fn

os.environ["CUDA_VISIBLE_DEVICES"] = "0"  
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

In [None]:
rides = pd.read_csv('hour.csv')
rides.head()

In [None]:
#pre-processing data
features, targets, data_process, scale_feature = data_processing(rides)

In [None]:
#get the data from different season and using first three for training and the last for testing

season1 = features[np.where(data_process[:,8]==1)]
season1_target = targets[np.where(data_process[:,8]==1)]
season2 = features[np.where(data_process[:,9]==1)]
season2_target = targets[np.where(data_process[:,9]==1)]
season3 = features[np.where(data_process[:,10]==1)]
season3_target = targets[np.where(data_process[:,10]==1)]
season4 = features[np.where(data_process[:,11]==1)]
season4_target = targets[np.where(data_process[:,11]==1)]

x_train = np.vstack((season1,season2))
x_train = np.vstack((x_train,season3))
y_train = np.hstack((season1_target,season2_target))
y_train = np.hstack((y_train,season3_target))
y_train = np.expand_dims(y_train, axis=-1)

x_test = season4
y_test = season4_target
y_test = np.expand_dims(y_test, axis=-1)

x_train = np.float32(x_train)
x_test = np.float32(x_test)
y_train = np.float32(y_train)
y_test = np.float32(y_test)
print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)

In [None]:
#add some noise 
train_data = Syn_data(x_train, y_train, noise_type='Expoential',noise_level=1)
train_loader = torch.utils.data.DataLoader(train_data,batch_size=32, shuffle=True,drop_last=True)
test_data = Syn_data(x_test, y_test, noise_type='Expoential',noise_level=1)
test_loader = torch.utils.data.DataLoader(test_data,batch_size=32, shuffle=False,drop_last=True)

In [None]:
model = LinearRegression(input_shape=x_train.shape[1])
model = model.to(device)
if device == 'cuda':
    model = torch.nn.DataParallel(model)
    cudnn.benchmark = True
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
def train(epoch,loss_type):
    #print('\nEpoch: %d' % epoch)
    print('\nEpoch [{}/{}]'.format(epoch+1, num_epochs))
    model.train()
    train_loss = 0
    correct = 0
    total = 0
    for batch_idx, (inputs, targets) in enumerate(train_loader):
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        
        loss = loss_fn(inputs, outputs, targets, str(loss_type))
        loss.backward()
        optimizer.step()

        train_loss += loss.item()


    print ('Step [{}/{}], Loss: {:.4f}' 
               .format(batch_idx, 
                       len(train_loader), 
                       train_loss/(batch_idx+1)))
    return train_loss/(batch_idx+1)
       

def calculate_bias(model):
    model.eval()
    bias_loss = 0
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(train_loader):
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            loss = loss_fn(inputs, outputs, targets, 'bias')

            bias_loss += loss.item()
        bias = bias_loss/(batch_idx+1)
    return bias

    
    
    
def test(epoch):
    global best_loss
    model.eval()
    test_loss = 0

    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(test_loader):
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            loss = loss_fn(inputs, outputs, targets, 'mse')

            test_loss += loss.item()


        print ('Step [{}/{}], Loss: {:.4f}' 
                   .format(batch_idx, 
                           len(test_loader), 
                           test_loss/(batch_idx+1)))
        
    loss_t = test_loss/(batch_idx+1)
    if loss_t < best_loss:
        print('Saving..')
        state = {
            'net': model.state_dict(),
            'test_loss': loss_t,
            'epoch': epoch,
        }
        if not os.path.isdir('checkpoint'):
            os.mkdir('checkpoint')
        torch.save(state, './checkpoint/bike_ckpt_MAE_ep.pth')
        best_loss = loss_t
    return loss_t
        
        
def test_with_bias(model,epoch,bias,loss_type):
    global best_loss
    model.eval()
    test_loss = 0

    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(test_loader):
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            outputs = outputs + bias
            loss = loss_fn(inputs, outputs, targets, 'mse')

            test_loss += loss.item()
        print ('Step [{}/{}], Loss: {:.4f}' 
                   .format(batch_idx, 
                           len(test_loader), 
                           test_loss/(batch_idx+1)))
        
    loss_t = test_loss/(batch_idx+1)
    if loss_t < best_loss:
        print('Saving..')
        state = {
            'net': model.state_dict(),
            'test_loss': loss_t,
            'bias': bias,
            'epoch': epoch,
        }
        if not os.path.isdir('checkpoint'):
            os.mkdir('checkpoint')
        torch.save(state, './checkpoint/bike_ckpt_'+str(loss_type)+'.pth')
        best_loss = loss_t
    return loss_t


In [None]:
import time
best_loss = 10
num_epochs = 100
batch_size = 32
train_MI = []
test_mse_Loss = []
loss_type = 'ours'
for epoch in range(num_epochs):
    start_time = time.time()
    train_mi = train(epoch,loss_type)
    bias = calculate_bias(model)
    test_mse_loss = test_with_bias(model,epoch,bias,loss_type)
    #test_mse_loss = test(epoch)
    
    print(time.time() - start_time)
    
    train_MI.append(train_mi)
    test_mse_Loss.append(test_mse_loss)

In [None]:
net = LinearRegression(input_shape=x_train.shape[1])
net = net.to(device)
if device == 'cuda':
    net = torch.nn.DataParallel(model)
    cudnn.benchmark = True
checkpoint = torch.load('./checkpoint/bike_ckpt_'+str(loss_type)+'.pth')
net.load_state_dict(checkpoint['net'])

In [None]:
def predict(model,data_loader): 
    model.eval()
    # Predict on the test set
    y_pred = []
    with torch.no_grad():
        for batch_idx, (inputs, _) in enumerate(data_loader):
            inputs = inputs.to(device)
            outputs = model(inputs)
            z = outputs.cpu()
            y_pred.append(z.detach().numpy())
    y_pred = np.array(y_pred)
    y_pred = y_pred.reshape(-1,1)
    return np.array(y_pred)

In [None]:
import matplotlib.pyplot as plt
plt.figure(figsize=(20,10))
xx = np.arange(0.0, 200, 1)
mean, std = scale_feature['cnt']
y_pred = predict(net,test_loader)
predictions_hsic = (y_pred+checkpoint['bias'])*std + mean
true_target = y_test*std +mean

In [None]:
plt.plot(xx,true_target[1000:1200],label='True')
plt.plot(xx,predictions_hsic[1000:1200],label='HSIC')