In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
df_train =  pd.read_csv('../input/predict-volcanic-eruptions-ingv-oe/train.csv') #dataframe of segment_id vs time_to eruption

In [None]:
df_test = pd.read_csv('../input/predict-volcanic-eruptions-ingv-oe/sample_submission.csv')

In [None]:
#list of segment_id, train
id_list_train = [] 
for dirname, _, filenames in os.walk('../input/predict-volcanic-eruptions-ingv-oe/train'):
    for filename in filenames:
        id_list_train.append(os.path.join(dirname, filename))
        
for i in range(len(id_list_train)): 
    id_list_train[i] = id_list_train[i].replace('../input/predict-volcanic-eruptions-ingv-oe/train/','').replace('.csv','')
    
id_list_train = np.asarray(id_list_train)
id_list_train = id_list_train.astype(np.int)

display( len(id_list_train), id_list_train)

In [None]:
id_list_val = id_list_train[(len(id_list_train) - 1000):]
id_list_train = id_list_train[:(len(id_list_train) - 1000)]
print(len(id_list_val), len(id_list_train))

In [None]:
#list of segment_id, test
id_list_test = [] 
for dirname, _, filenames in os.walk('../input/predict-volcanic-eruptions-ingv-oe/test'):
    for filename in filenames:
        id_list_test.append(os.path.join(dirname, filename))
        
for i in range(len(id_list_test)): 
    id_list_test[i] = id_list_test[i].replace('../input/predict-volcanic-eruptions-ingv-oe/test/','').replace('.csv','')
    
id_list_test = np.asarray(id_list_test)
id_list_test = id_list_test.astype(np.int)

display( len(id_list_test), id_list_test)

In [None]:
import matplotlib.pyplot as plt
import scipy as scp
import scipy.signal
import time

In [None]:
def get_spectra(sens_num, id_segment, train):
    if train:
        path = '../input/predict-volcanic-eruptions-ingv-oe/train/' + str(id_segment) + '.csv'
    else:
        path = '../input/predict-volcanic-eruptions-ingv-oe/test/' + str(id_segment) + '.csv'
    
    df_spec = pd.read_csv(path)
    sens_num = 'sensor_' + str(sens_num)
    
    sns = df_spec[sens_num].to_numpy()
    f, t, Sxx = scipy.signal.spectrogram(sns, 100)
    plt.figure(figsize=(25,10))
    spectra = plt.pcolormesh(t, f, Sxx, shading='auto')
    plt.ylabel('Frequency [Hz]')
    plt.xlabel('Time [sec]')
    plt.ylim(0,30)
    plt.colorbar(spectra)
    plt.show()
    pass

In [None]:
id_segment1 = id_list_train[0]
get_spectra(sens_num=3, id_segment=id_segment1, train=True)

In [None]:
def extract_features(id_df, id_segment, train):
    
    extracted_features = np.zeros([10,129,267])
    
    if train:
        t = id_df['time_to_eruption'].loc[id_df['segment_id'] == id_segment]
        t = t.to_numpy()
    else:
        t = 0
    
    if train:
        path = '../input/predict-volcanic-eruptions-ingv-oe/train/' + str(id_segment) + '.csv'
    else:
        path = '../input/predict-volcanic-eruptions-ingv-oe/test/' + str(id_segment) + '.csv'
        
    df_extr = pd.read_csv(path)
    sns = df_extr.to_numpy()

    for i in range(10):
        _, _, Sxx  = scipy.signal.spectrogram(sns[:,i], 100)
        extracted_features[i] = Sxx
        np.nan_to_num(extracted_features[i], copy=False, nan=0.0)
    
    
    return extracted_features, t

In [None]:
id_segment1 = id_list_train[0]
extracted_features1, t1 = extract_features(df_train, id_segment1, train=True)
display(extracted_features1, t1)

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [None]:
USE_GPU = True

if USE_GPU and torch.cuda.is_available():
    print('using device: cuda')
else:
    print('using device: cpu')

In [None]:
""" conv test
x = torch.rand((10, 64, 6, 6))
print(x.size())
f = nn.MaxPool2d(2, 2, (0, 0))
y = f(x)
y.double()
print(y.size())

In [None]:
mean, std = 0, 0
mean = df_train['time_to_eruption'].mean()
std = df_train['time_to_eruption'].std()
print(mean, std)

In [None]:
import torchvision.transforms as transforms

In [None]:
def normalize(tensor, mean, std):
    tensor = (tensor - mean)/ std
    return tensor

def unnormalize(tensor, mean, std):
    tensor = tensor * std + mean
    return tensor

In [None]:
class erupt_predictor(torch.nn.Module):
    def __init__(self):
        super(erupt_predictor, self).__init__()
        
        self.conv0 = nn.Conv2d(1, 8, 2)  # input, output, kernel
        self.conv1 = nn.Conv2d(8, 16, 3)
        self.conv2 = nn.Conv2d(16, 32, 3)
        self.conv3 = nn.Conv2d(32, 64, 3)
        self.conv4 = nn.Conv2d(64, 64, 3)
        self.conv5 = nn.Conv2d(64, 64, 3)
        self.conv6 = nn.Conv2d(64, 128, 3)
        
        self.pool1 = nn.MaxPool2d(2, 2, (1, 0)) #kernel, stride, padding
        self.pool2 = nn.MaxPool2d(2, 2, 1)
        self.pool3 = nn.MaxPool2d((2, 4), (2, 4), (1, 0))
        self.pool4 = nn.MaxPool2d(2, 2, 1)
        self.pool5 = nn.MaxPool2d(2, 2, 0)
        
        self.fc1 = nn.Linear(128, 16)
        self.fc2 = nn.Linear(16, 1)
        self.fc3 = nn.Linear(10, 1)
        
        torch.nn.init.xavier_uniform_(self.conv0.weight)
        torch.nn.init.xavier_uniform_(self.conv1.weight)
        torch.nn.init.xavier_uniform_(self.conv2.weight)
        torch.nn.init.xavier_uniform_(self.conv3.weight)
        torch.nn.init.xavier_uniform_(self.conv4.weight)
        torch.nn.init.xavier_uniform_(self.conv5.weight)
        torch.nn.init.xavier_uniform_(self.conv6.weight)
        torch.nn.init.xavier_uniform_(self.fc1.weight)
        torch.nn.init.xavier_uniform_(self.fc2.weight)
        torch.nn.init.xavier_uniform_(self.fc3.weight)

    def forward(self, x):
        x = F.leaky_relu(self.conv0(x))
        
        x = F.leaky_relu(self.conv1(x))
        x = self.pool1(x)
        x = F.leaky_relu(self.conv2(x))
        x = self.pool2(x)
        x = F.leaky_relu(self.conv3(x))
        x = self.pool3(x)
        
        x = F.leaky_relu(self.conv4(x))
        x = self.pool4(x)
        x = F.leaky_relu(self.conv5(x))
        x = self.pool5(x)
        x = F.leaky_relu(self.conv6(x))
        
        x = x.view(-1, 128)
        x = self.fc1(x)
        x = F.leaky_relu(x)
        x = self.fc2(x)
        x = F.leaky_relu(x)

        x = torch.transpose(x, 0, 1)
        x = self.fc3(x)
        x = F.leaky_relu(x)
        x = torch.reshape(x, (-1,))
        
        return x

model = erupt_predictor()

In [None]:
model

In [None]:
model = model.to('cuda')

In [None]:
params = list(model.parameters())
print(len(params))
for i in range(len(params)):
  print(params[i].size())

In [None]:
epochCount = 10
learningRate = 0.000001
criterion = nn.MSELoss()
#optimizer = optim.Adam(params, lr = learningRate, betas = (0.9,0.999), eps = 1e-6)
optimizer = optim.SGD(params, lr = learningRate, momentum = 0.9, nesterov=False)

In [None]:
criterion = criterion.to('cuda')

In [None]:
def missing_sensors(e_f):
    miss_sens_list = []
    for i in range(len(e_f)):
        mean = np.mean(e_f[i])
        miss_sens_list.append(mean == 0)
    sens_num = np.sum(miss_sens_list)
    return miss_sens_list, sens_num

In [None]:
def prep_data(e_f, t, train): # if train=False, write t=None
        
        #miss_sens_list, _ = missing_sensors(e_f)
        #e_f = np.delete(e_f, np.where(miss_sens_list), 0)
        
        e_f_trch = torch.DoubleTensor(e_f)
        e_f_trch = e_f_trch.unsqueeze(1)
        if train:
            t_trch = torch.DoubleTensor(t)
        else:
            t_trch = 0
        
        e_f_trch = e_f_trch.to('cuda')
        t_trch = t_trch.to('cuda')
        
        return e_f_trch, t_trch

In [None]:
import random

In [None]:
def evaluate():
    i = np.random.randint(0, 1000)
    print('------evaluation------')
    print('segment idx:', i)
    extracted_features, t = extract_features(df_train, id_list_val[i], train=True)
    with torch.no_grad():
        model.train(mode=False)
        e_f_trch, t_trch = prep_data(extracted_features, t, train=True)
        e_f_trch = e_f_trch.to('cuda')
        t_trch = t_trch.to('cuda')
        model.double()
        t_trch = normalize(t_trch, mean, std)
        output = model(e_f_trch)
        ms, sens_num = missing_sensors(extracted_features)
        loss = criterion(output, t_trch)
        output = output.cpu()
        t_trch = t_trch.cpu()
        loss = loss.cpu()
        print('%d sensor(s) missing:' % sens_num, ms)
        print('ground_truth & predicted time:')
        print(t_trch.numpy(), output.numpy(), sep='\n')
        print('sensor importance coefficients:', model.fc3.weight.data.cpu().numpy(), sep='\n')
        print('loss:', loss.numpy())
        print('----------------------')
        model.train(mode=True)

In [None]:
evaluate()

In [None]:
def evaluate_epoch():
    epoch_loss = 0.0
    print('---epoch-evaluation---')
    for i in range(1000):
        with torch.no_grad():
            extracted_features, t = extract_features(df_train, id_list_val[i], train=True)
            model.train(mode=False)
            e_f_trch, t_trch = prep_data(extracted_features, t, train=True)
            e_f_trch = e_f_trch.to('cuda')
            t_trch = t_trch.to('cuda')
            model.double()
            t_trch = normalize(t_trch, mean, std)
            output = model(e_f_trch)
            loss = criterion(output, t_trch)
            epoch_loss += loss
            
            if i % 100 == 99:
                print(i + 1, '/1000', sep='')
   
    epoch_loss /= 1000
    epoch_loss = epoch_loss.cpu()
    print('mean validation loss:', epoch_loss.numpy())
    print('----------------------')

In [None]:
evaluate_epoch()

In [None]:
start_tr = time.time()
with torch.enable_grad():
    for epoch in range(epochCount):
        start_ep = time.time()

        model.train(mode=True)
        optimizer.zero_grad()

        print('epoch %d' % (epoch + 1))

        start_batch = time.time()
        random.shuffle(id_list_train)
        for i, id_seg in enumerate(id_list_train):
            #running_loss = 0.0
            
            e_f, t = extract_features(df_train, id_seg, train=True)
            e_f_trch, t_trch = prep_data(e_f, t, train=True)
            t_trch = normalize(t_trch, mean, std)
            model.double()

            output = model(e_f_trch)
            loss = criterion(output, t_trch)
            loss.backward()
            optimizer.step()

            """
            running_loss = loss.item()
            print(running_loss)
            """

            if i % 100 == 99:
                end_batch = time.time()
                print('%d / %d' % (i + 1, len(id_list_train)))
                print('time: %d s' % (end_batch - start_batch))
                start_batch = end_batch
                
            if i % 500 == 499:
                evaluate()
        
        evaluate_epoch()
        end_ep = time.time()
        print('time spent: %d s' % (end_ep - start_ep))

end_tr = time.time()
print('sector clear, time spent: %d s' % (end_tr - start_tr))

In [None]:
checkpoint = {'model': erupt_predictor(),
              'state_dict': model.state_dict(),
              'optimizer' : optimizer.state_dict()}

torch.save(checkpoint, 'checkpoint_4.pth')

In [None]:
def load_checkpoint(filepath):
    checkpoint = torch.load(filepath)
    model = checkpoint['model']
    model.load_state_dict(checkpoint['state_dict'])
    
    model.eval()
    
    return model

In [None]:
model = load_checkpoint('../input/model-checkpoint/checkpoint (1).pth')
print(model)