In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import scipy as sp
import numpy as np
import ipywidgets as widgets
import torch
from torch import nn, functional as F
from torch.utils.data import Dataset, DataLoader

In [None]:
MIN_IN_DAY = 1440
CUTOFF = 172800

In [None]:
df = pd.read_csv('processed_data.csv')
df

In [None]:
daily = np.array(df.iloc[:, 1:])[:CUTOFF].T  # 346 residents, 172800 min
daily = daily.reshape((daily.shape[0], -1, MIN_IN_DAY))  # 346 residents, 120 days, 1440 minutes
daily = np.nanmean(daily, axis=1)  # 346 residents, 1440 minutes

In [None]:
@widgets.interact(resident=(0, daily.shape[0]-1))
def plot_daily_load(resident=31):
    plt.plot(daily[resident])
    plt.show()

In [None]:
def normalized(load):
    peak = load.max(axis=1)[:, None]
    trough = load.min(axis=1)[:, None]
    diff = peak - trough
    diff[diff == 0.] = 1.
    normalized = (load - trough) / diff
    return normalized

In [None]:
normalized_daily = normalized(daily)

In [None]:
@widgets.interact(resident=(0, normalized_daily.shape[0]-1))
def plot_normalized_daily(resident=217):
    plt.plot(normalized_daily[resident])
    plt.show()

In [None]:
# Dataset
class DS(Dataset):
    def __init__(self, data, sep, train=True):
        super().__init__()
        if train:
            self.data = torch.Tensor(data[:sep]).cuda()
        else:
            self.data = torch.Tensor(data[sep:]).cuda()
        self.data.unsqueeze_(1)
    
    def __getitem__(self, i):
        return self.data[i]
    
    def __len__(self):
        return self.data.shape[0]

In [None]:
# Autoencoder with MLP
class AE_MLP(nn.Module):
    def __init__(self, cfg):
        super().__init__()
        encoder = []
        decoder = []
        act = cfg['activation']
        
        # encoder
        for i in range(len(cfg['encoder']) - 1):
            cin, cout = cfg['encoder'][i], cfg['encoder'][i+1]
            encoder.append(nn.Linear(cin, cout))
            encoder.append(act)

        # decoder
        for i in range(len(cfg['decoder']) - 1):
            cin, cout = cfg['decoder'][i], cfg['decoder'][i+1]
            decoder.append(nn.Linear(cin, cout))
            decoder.append(act)

        self.encoder = nn.Sequential(*encoder)
        self.decoder = nn.Sequential(*decoder)
        
    def forward(self, x):
        latent = self.encoder(x)
        recon = self.decoder(latent)
        return latent, recon

In [None]:
cfg = {
    'activation': nn.ReLU(),
    'encoder': [1440, 256, 64, 4],
    'decoder': [4, 64, 256, 1440]
}
model = AE_MLP(cfg)
model.cuda()

In [None]:
class View(nn.Module):
    def __init__(self, shape):
        super().__init__()
        self.shape = shape

    def forward(self, x):
        return x.view(x.shape[0], *self.shape)

# Autoencoder using CNN
    
class AE_CNN(nn.Module):
    def __init__(self, cfg):
        super().__init__()
        act = cfg['activation']
        
        # encoder
        enc = [
            # 1440 -> 288
            nn.Conv1d(1, 16, 5, padding=2),
            nn.MaxPool1d(kernel_size=5),
            act,
            # 288 -> 72
            nn.Conv1d(16, 32, 4, padding=2),
            nn.MaxPool1d(kernel_size=4),
            act,
            # 72 -> 24
            nn.Conv1d(32, 1, 3, padding=1),
            nn.MaxPool1d(kernel_size=3),
            act,
            # 24 -> 8 (fully connected)
            nn.Conv1d(1, 16, 24),
            act,
            # now we have our latent vector with shape (B, 16, 1)
        ]

        # decoder
        dec = [
            # 8 -> 24
            nn.Conv1d(1, 32, 3, padding=1),
            nn.Upsample(72),
            act,
            # 32 -> 96
            nn.Conv1d(32, 16, 4, padding=2),
            nn.Upsample(288),
            act,
            # 96 -> 288
            nn.Conv1d(16, 1, 5, padding=2),
            nn.Upsample(1440),
            nn.Tanh(),
        ]
        
        self.encoder = nn.Sequential(*enc)
        self.decoder = nn.Sequential(*dec)
        
    def forward(self, x):
        latent = self.encoder(x).permute(0, 2, 1)
        recon = self.decoder(latent)
        return latent, recon
        return latent

In [None]:
cfg = {
    'activation': nn.ReLU()
}

conv_ae = AE_CNN(cfg).cuda()
conv_ae

In [None]:
# Training setup
# At epoch, randomly shuffle the daily loads, then feed in the network sequentially.
ntraindata = 250
epoch = 100000
lr = 1e-3
optim = torch.optim.Adam(conv_ae.parameters(), lr=lr)
loss_fn = nn.MSELoss()
bsz = 125
train_dataset = DS(normalized_daily, 10000, train=True)
test_dataset = DS(normalized_daily, ntraindata, train=False)
# train_dataset = DS(daily, ntraindata, train=True)
# test_dataset = DS(daily, ntraindata, train=False)
train_loader = DataLoader(train_dataset, batch_size=bsz, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=len(test_dataset), shuffle=False)

train_loss = []
test_loss = []

In [None]:
for e in range(epoch):
    for ibatch, batch in enumerate(train_loader):
        optim.zero_grad()
        latent, recon = conv_ae(batch)
        loss = loss_fn(recon, batch)
        loss.backward()
        optim.step()

    if e % 100 == 0:
        fig, axs = plt.subplots(1, 3, figsize=(12, 3))
        train_loss.append(loss.item())
        axs[0].plot(train_loss)
        # evaluate network
        with torch.no_grad():
            test_data = iter(test_loader).next()
            _, recon = conv_ae(test_data)
            loss = loss_fn(recon, test_data)
            test_loss.append(loss.item())
            axs[1].plot(test_loss)
            vis = np.random.randint(0, test_data.shape[0])
            axs[2].plot(test_data[vis][0].detach().cpu())
            axs[2].plot(recon[vis][0].detach().cpu())
            plt.show()


In [None]:
conv_ae.load_state_dict(torch.load('models/AE_CNN_d=16.pt'))

In [None]:
data = train_dataset.data
data_c = data.detach().cpu().numpy()[:, 0]

In [None]:
latent, pred = conv_ae(data)
latent = latent.detach().cpu().numpy()[:, 0]
pred = pred.detach().cpu().numpy()[:, 0]

In [None]:
pred.shape

In [None]:
@widgets.interact(resident=(0, data.shape[0]-1))
def plot_normalized_daily(resident=217):
    plt.plot(data_c[resident])
    plt.plot(pred[resident])
    plt.show()
    print(latent[resident])

In [None]:
from sklearn.cluster import KMeans

In [None]:
latent.shape

In [None]:
clusters = 5

In [None]:
kmeans = KMeans(n_clusters=clusters, random_state=2000).fit(latent)

In [None]:
kmeans.labels_
kmeans.cluster_centers_

In [None]:
def decode(latent_vec: np.array, model):
    latent_tensor = torch.Tensor(latent_vec[None,:,None]).cuda()
    with torch.no_grad():
        decoded = model.decoder(latent_tensor.permute(0, 2, 1))
    return decoded.cpu().numpy()[0, 0, :]

In [None]:
@widgets.interact(Cluster=(0, clusters-1))
def plot_cluster(Cluster):
    resd = (kmeans.labels_==Cluster)
    load = data_c[resd]
    mean = load.mean(axis=0)
    centroid = kmeans.cluster_centers_[Cluster]
    decoded_mean = decode(centroid, conv_ae)
    print(f'{load.shape[0]} residents')
    plt.plot(load.T, alpha=0.03)
    plt.plot(mean, c='red')
    plt.plot(decoded_mean, c='green')
    plt.show()