In [41]:
import torch
from torch import nn, optim

import pandas as pd
from torch.utils.data import Dataset, DataLoader

from tqdm import tqdm


In [42]:
class CustomDataset(Dataset):

    def __init__(self, dataframe: pd.DataFrame, feature_names: list, target_name: str):
        self.X = torch.tensor(dataframe[feature_names].values, dtype=torch.float32)
        self.X = self.X.reshape(self.X.shape[0], 1, self.X.shape[1])
        self.y = torch.tensor(dataframe[target_name].values, dtype=torch.float32)
        self.y = self.y.type(torch.LongTensor)
        

    def __len__(self) -> int:
        return len(self.y)
    
    def __getitem__(self, index):
        return self.X[index], self.y[index]

In [43]:
df = pd.read_csv("data\df_01.csv", index_col=[0])

In [44]:
lithology_keys = {30000: 0,
                  65030: 1,
                  65000: 2,
                  80000: 3,
                  74000: 4,
                  70000: 5,
                  70032: 6,
                  88000: 7,
                  86000: 8,
                  99000: 9,
                  90000: 10,
                  93000: 11}

In [45]:
df['FORCE_2020_LITHOFACIES_LITHOLOGY'] = df['FORCE_2020_LITHOFACIES_LITHOLOGY'].map(lithology_keys)

In [46]:
feature_names = ['DEPTH_MD', 'X_LOC', 'Y_LOC', 'Z_LOC', 'CALI', 'RMED', 'RDEP', 'RHOB',
                 'GR', 'NPHI', 'PEF', 'DTC', 'BS', 'ROP', 'DTS', 'DCAL', 'DRHO', 'ROPA']

target_name = 'FORCE_2020_LITHOFACIES_LITHOLOGY'

train_dataset = CustomDataset(dataframe=df, feature_names=feature_names, target_name=target_name)
train_data = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True, )

In [47]:
class DoubleConv(nn.Module):
    
    def __init__(self, in_size: int, out_size: int):
        super(DoubleConv, self).__init__()

        self.conv_net = nn.Sequential(
            nn.Conv1d(in_size, out_size, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv1d(out_size, out_size, kernel_size=3, padding=1),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        return self.conv_net(x)


In [48]:
class DownSample(nn.Module):

    def __init__(self, in_size: int, out_size: int):
        super(DownSample, self).__init__()

        self.conv = DoubleConv(in_size, out_size)
        self.pool = nn.MaxPool1d(kernel_size=2, stride=2)

    def forward(self, x):
        down = self.conv(x)
        p = self.pool(down)
        return down, p



In [49]:
class UpSample(nn.Module):

    def __init__(self, in_size: int, out_size: int):
        super(UpSample, self).__init__()

        self.up = nn.ConvTranspose1d(in_size, in_size//2, kernel_size=2, stride=2)
        self.conv = DoubleConv(in_size, out_size)

    def forward(self, x1, x2):
        x1 = self.up(x1)
        x = torch.cat([x1, x2])
        return self.conv(x)



In [50]:
class UNet(nn.Module):

    def __init__(self, in_size: int, num_classes: int):
        super(UNet, self).__init__()

        self.down_conv_1 = DownSample(in_size, 64)
        self.down_conv_2 = DownSample(64, 128)
        self.down_conv_3 = DownSample(128, 256)

        self.bottle_nack = DoubleConv(256, 512)

        self.up_conv_1 = UpSample(512, 256)
        self.up_conv_2 = UpSample(256, 128)
        self.up_conv_3 = UpSample(128, 64)

        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64, 128),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(128, 256),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(128, num_classes),
            nn.Softmax()
        )

    def forward(self, x):

        down_1, p1 = self.down_conv_1(x)
        down_2, p2 = self.down_conv_2(p1)
        down_3, p3 = self.down_conv_3(p2)

        b = self.bottle_nack(p3)

        up_1 = self.up_conv_1(b, down_3)
        up_2 = self.up_conv_2(up_1, down_2)
        up_3 = self.up_conv_3(up_2, down_1)

        out = self.fc(up_3)
        return out

In [51]:
model = UNet(1, 12)
print(model)


UNet(
  (down_conv_1): DownSample(
    (conv): DoubleConv(
      (conv_net): Sequential(
        (0): Conv1d(1, 64, kernel_size=(3,), stride=(1,), padding=(1,))
        (1): ReLU(inplace=True)
        (2): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
        (3): ReLU(inplace=True)
      )
    )
    (pool): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (down_conv_2): DownSample(
    (conv): DoubleConv(
      (conv_net): Sequential(
        (0): Conv1d(64, 128, kernel_size=(3,), stride=(1,), padding=(1,))
        (1): ReLU(inplace=True)
        (2): Conv1d(128, 128, kernel_size=(3,), stride=(1,), padding=(1,))
        (3): ReLU(inplace=True)
      )
    )
    (pool): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (down_conv_3): DownSample(
    (conv): DoubleConv(
      (conv_net): Sequential(
        (0): Conv1d(128, 256, kernel_size=(3,), stride=(1,), padding=(1,))
        (1): ReLU(inplace=True)
      

In [52]:
learning_rate = 0.01
batch_size = 64

optimizer = optim.Adam(model.parameters(), lr=learning_rate)
loss_function = nn.CrossEntropyLoss()

model.train()

UNet(
  (down_conv_1): DownSample(
    (conv): DoubleConv(
      (conv_net): Sequential(
        (0): Conv1d(1, 64, kernel_size=(3,), stride=(1,), padding=(1,))
        (1): ReLU(inplace=True)
        (2): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
        (3): ReLU(inplace=True)
      )
    )
    (pool): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (down_conv_2): DownSample(
    (conv): DoubleConv(
      (conv_net): Sequential(
        (0): Conv1d(64, 128, kernel_size=(3,), stride=(1,), padding=(1,))
        (1): ReLU(inplace=True)
        (2): Conv1d(128, 128, kernel_size=(3,), stride=(1,), padding=(1,))
        (3): ReLU(inplace=True)
      )
    )
    (pool): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (down_conv_3): DownSample(
    (conv): DoubleConv(
      (conv_net): Sequential(
        (0): Conv1d(128, 256, kernel_size=(3,), stride=(1,), padding=(1,))
        (1): ReLU(inplace=True)
      

In [53]:
epochs = 10

for e in range(epochs):
    loss_mean = 0
    lm_count = 0

    train_tqdm = tqdm(train_data, leave=True)
    for x_train, y_train in train_tqdm:
        predict = model(x_train)
        loss = loss_function(predict, y_train)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        lm_count += 1
        loss_mean = 1/lm_count * loss.item() + (1 - 1/lm_count) * loss_mean
        train_tqdm.set_description(f'Epoch [{e+1}/{epochs}], loss_mean={loss_mean:.3f} ')



  0%|          | 0/1898 [00:00<?, ?it/s]

  0%|          | 0/1898 [00:00<?, ?it/s]


RuntimeError: Given groups=1, weight of size [256, 512, 3], expected input[128, 256, 4] to have 512 channels, but got 256 channels instead