In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam
import time
from tqdm import tqdm

### Модель:

In [2]:
# GCN Layer:
class GCNLayer(nn.Module):
    def __init__(self, in_dim, out_dim):
        super().__init__()
        
        self.dense = nn.Linear(in_dim, out_dim)

    def forward(self, adj, X):
        adj = adj + torch.eye(adj.size(0))#.to(adj.device)
        h = self.dense(X)
        norm = adj.sum(1)**(-1/2)
        h = norm[None, :] * adj * norm[:, None] @ h
        return h

In [3]:
# Graph construction module:  
class Graph_tanh(nn.Module):
    def __init__(self, num_nodes, k, alpha, device):
        super(Graph_tanh, self).__init__()
        self.num_nodes = num_nodes
        self.k = k
        self.alpha = alpha
        self.device = device
        
        self.A = nn.Parameter((torch.randn(num_nodes, num_nodes)+1), requires_grad=True)#.to(device)

    def forward(self, idx):
        
        adj = torch.tanh(0.1*self.A)
        
        if self.k:
            mask = torch.zeros(idx.size(0), idx.size(0))#.to(self.device)
            mask.fill_(float('0'))
            s1,t1 = (adj + torch.rand_like(adj)*0.01).topk(self.k,1)
            mask.scatter_(1,t1,s1.fill_(1))
            adj = adj*mask
            
        return adj

In [4]:
class Graph_Directed_A(nn.Module):
    def __init__(self, num_nodes, window_size, k, alpha, device):
        super(Graph_Directed_A, self).__init__()
        
        self.alpha = alpha
        self.k = k
        self.device = device
        
        self.e1 = nn.Embedding(num_nodes, window_size)
        self.e2 = nn.Embedding(num_nodes, window_size)
        self.l1 = nn.Linear(window_size,window_size)
        self.l2 = nn.Linear(window_size,window_size)
        
    def forward(self, idx):
        
        m1 = torch.tanh(self.alpha*self.l1(self.e1(idx)))
        m2 = torch.tanh(self.alpha*self.l2(self.e2(idx)))
        adj = F.relu(torch.tanh(self.alpha*torch.mm(m1, m2.transpose(1,0))))
        
        if self.k:
            mask = torch.zeros(idx.size(0), idx.size(0)).to(self.device)
            mask.fill_(float('0'))
            s1,t1 = (adj + torch.rand_like(adj)*0.01).topk(self.k,1)
            mask.scatter_(1,t1,s1.fill_(1))
            adj = adj*mask
        
        return adj

In [5]:
# Graph Neural Network
class GNN(nn.Module):    
    def __init__(self, num_nodes=108, window_size=10, nhidden=15, alpha=1, k=None, out_channels=108, device=None):
        super(GNN, self).__init__()
        self.window_size = window_size
        self.nhidden = nhidden
        self.num_nodes = num_nodes
        self.device = device
        self.idx = torch.arange(self.num_nodes).to(device)
        self.adj = None
        self.z = (torch.ones(num_nodes, num_nodes) - torch.eye(num_nodes))#.to(device)
        
        #self.graph_struct = Graph_Directed_A(num_nodes,  window_size, k, alpha, device)
        self.graph_struct = Graph_tanh(num_nodes, k, alpha, device)
        self.conv1 = GCNLayer(window_size, nhidden)
        self.bnorm1 = nn.BatchNorm1d(num_nodes)
        self.conv2 = GCNLayer(nhidden, out_channels)
        self.bnorm2 = nn.BatchNorm1d(num_nodes)
        
        #self.fc = nn.Linear(nhidden, out_channels)
    
    
    def forward(self, X):
        
        #X = X.to(self.device)
        self.adj = self.graph_struct(self.idx)
        self.adj = self.adj * self.z
        h = self.conv1(self.adj, X).relu()
        h = self.bnorm1(h)
        #skip = torch.squeeze(h)
        h = self.conv2(self.adj, h).relu()
        h = self.bnorm2(h)
        h = torch.squeeze(h)
        #h += skip
                
        #h = torch.cat(self.h, 1)
        #output = self.fc(h).relu()
        
        return h
    
    def get_adj(self):
        return self.adj

### Цикл предсказаний:

In [6]:
for file_idx in tqdm(range(1, 4446)):
    # загрузка файла:
    try:
        sheet1 = pd.read_excel('Test_input_'+str(file_idx)+'.xlsx') # месячные данные
        df = sheet1.iloc[:,1:].copy()
        Q = 1
        try:
            sheet2 = pd.read_excel('Test_input_'+str(file_idx)+'.xlsx', sheet_name='Quarterly') # квартальные данные
        except:
            Q = 0

        # Объединение месячных и квартальных данных:
        if Q == 1:
            cols_sheet2 = sheet2.iloc[:,1:].columns # список столбцов в квартальных данных
        cols_sheet1 = df.columns
        if Q == 1:       
            for column in cols_sheet2: # добавление квартальных столбцов к месячным с 0й инициализацией
                df[column+'2'] = 0.0
            for column in cols_sheet2: # Заполнение значений квартальных данных в общую таблицу (квартальное значение дублируется для всех 3х месяцев в квартале)
                for i in range(len(df)):
                    j = i//3
                    df[column+'2'].iloc[i] = sheet2[column].iloc[j]

        # индексирование:
        start_forecast = df.shape[0] # индекс с первой неизвестной переменной
        end_train_chunk = 0 # индекс последней известной переменной
        startforecast_idxs = [] # список индексов в которых появляется первая неизвестная переменная для каждого ряда
        mean_values = []
        std_values = []
        for i in range(df.shape[1]):

            # finding indexes where forcasting starts:
            if df.iloc[:,i][df.iloc[:,i] == 'Forecast'].index.empty:
                idx = None
            else:
                idx = df.iloc[:,i][df.iloc[:,i] == 'Forecast'].index[0]
                if idx < start_forecast:
                    start_forecast = idx
                if idx > end_train_chunk:
                    end_train_chunk = idx
            startforecast_idxs.append(idx)

            # feature normalization and replacement:
            mean_v = df.iloc[:idx,i].mean()
            std_v = df.iloc[:idx,i].std()
            df.iloc[:idx,i] = (df.iloc[:idx,i] - mean_v)/std_v
            mean_values.append(mean_v)
            std_values.append(std_v)

        forcasting_length = df.shape[0] - start_forecast # Количество предсказываемых временных меток

        # Параметры данных для модели:
        num_nodes = df.shape[1] # количество 
        in_features = 30 # количество известных периодов в обучающем отрезке

        # Разделение на обучающую и тестовую части:
        # Train data:
        train_mask = df.iloc[start_forecast-in_features:end_train_chunk, :]
        train_mask = np.array((train_mask=='Forecast'))
        train_chunk_length = in_features + (end_train_chunk - start_forecast) # длина обучающего временного отрезка
        start_X = 0
        stop_X = train_chunk_length-1
        start_y = stop_X - (end_train_chunk - start_forecast) + 1
        stop_y = start_y + forcasting_length - 1
        x = df.loc[start_X:stop_X].mask(np.array(train_mask),0.0)
        Train_X = torch.Tensor([np.array(x, dtype='float')])
        Train_y = torch.Tensor([np.array(df.loc[start_y:stop_y], dtype='float')])

        while stop_y < start_forecast - 1:
            start_X += 1
            stop_X += 1
            start_y += 1
            stop_y += 1
            x = df.loc[start_X:stop_X].mask(np.array(train_mask),0.0)
            Train_X = torch.cat((Train_X, torch.Tensor([np.array(x, dtype='float')])), 0)
            Train_y = torch.cat((Train_y, torch.Tensor([np.array(df.loc[start_y:stop_y], dtype='float')])), 0)
        Train_X = torch.transpose(Train_X, 1, 2)
        Train_y = torch.transpose(Train_y, 1, 2)

        # Test sample:
        start_X += forcasting_length
        stop_X += forcasting_length
        x = df.loc[start_X:stop_X].mask(np.array(train_mask),0.0)
        Test_X = torch.Tensor([np.array(x, dtype='float')])
        Test_X = torch.transpose(Test_X, 1, 2)

        # Создание модели:
        model = GNN(num_nodes=num_nodes, window_size=train_chunk_length, nhidden=128, out_channels=forcasting_length)
        opt = Adam(model.parameters(), lr=0.001)

        # Обучение:
        for i in range(500):
            model.train()
            logits = model(Train_X)

            loss = F.mse_loss(logits, Train_y)
            opt.zero_grad()
            loss.backward()
            opt.step()

        # Предсказание:
        model.eval()
        with torch.no_grad():
            logits = model(Test_X)

        y = logits.numpy()
        for i in range(num_nodes):
            y[i] = y[i] * std_values[i] + mean_values[i]
        a = pd.DataFrame(np.transpose(y))

        # Выгрузка ответа:
        for i in range(len(cols_sheet1)):
            if startforecast_idxs[i] is None:
                continue
            for j in range(sheet1.shape[0] - startforecast_idxs[i]):
                k = startforecast_idxs[i] + j
                sheet1.iloc[k,i+1] = a.iloc[j,i]
        if Q==1:
            for i in range(len(cols_sheet2)):
                val = []
                for k in range(0,len(a),3):
                    val.append(np.sum(a.iloc[k:k+3,i+len(cols_sheet1)])/3)
                if startforecast_idxs[i] is None:
                    continue
                for j in range(sheet2.shape[0] - startforecast_idxs[i]//3):
                    k = startforecast_idxs[i]//3 + j - 1
                    sheet2.iloc[k,i+1] = val[j]
        with pd.ExcelWriter('Test_output_'+str(file_idx)+'.xlsx') as writer:  
            sheet1.to_excel(writer, sheet_name='Monthly', index=False)
            if Q==1:
                sheet2.to_excel(writer, sheet_name='Quarterly', index=False,)
    except:
        continue

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
  Train_X = torch.Tensor([np.array(x, dtype='float')])
  loss = F.mse_loss(logits, Train_y)
100%|██████████| 4445/4445 [1:51:06<00:00,  1.50s/it]  
