In [213]:
import torch
from torch.utils.data import Dataset, DataLoader
from torch import nn, optim
import pandas as pd
from sklearn.model_selection import train_test_split

from tqdm import tqdm

In [214]:
df = pd.read_csv("data\df_01.csv", index_col=[0])

In [215]:
y = df[['FORCE_2020_LITHOFACIES_LITHOLOGY']]
X = df.drop(columns=['FORCE_2020_LITHOFACIES_LITHOLOGY', 'FORCE_2020_LITHOFACIES_CONFIDENCE'])

In [216]:
lithology_keys = {30000: 0,
                  65030: 1,
                  65000: 2,
                  80000: 3,
                  74000: 4,
                  70000: 5,
                  70032: 6,
                  88000: 7,
                  86000: 8,
                  99000: 9,
                  90000: 10,
                  93000: 11}

In [217]:
y['FORCE_2020_LITHOFACIES_LITHOLOGY'] = y['FORCE_2020_LITHOFACIES_LITHOLOGY'].map(lithology_keys)
y = y['FORCE_2020_LITHOFACIES_LITHOLOGY']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  y['FORCE_2020_LITHOFACIES_LITHOLOGY'] = y['FORCE_2020_LITHOFACIES_LITHOLOGY'].map(lithology_keys)


In [218]:
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [None]:
class CustomDataset(Dataset):

    def __init__(self, dataframe: pd.DataFrame, feature_names: list, target_name: str):
        self.X = torch.tensor(dataframe[feature_names].values, dtype=torch.float32)
        self.y = torch.tensor(dataframe[target_name].values, dtype=torch.float32)
        print(self.X.shape)
        self.X = self.X.reshape(self.X.shape[0], 1, self.X.shape[1])
        print(self.X.shape)


    def __len__(self) -> int:
        return len(self.y)
    
    def __getitem__(self, index):
        return self.X[index], self.y[index]

In [220]:
feature_names = ['DEPTH_MD', 'X_LOC', 'Y_LOC', 'Z_LOC', 'CALI', 'RMED', 'RDEP', 'RHOB',
                 'GR', 'NPHI', 'PEF', 'DTC', 'BS', 'ROP', 'DTS', 'DCAL', 'DRHO', 'ROPA']

target_name = 'FORCE_2020_LITHOFACIES_LITHOLOGY'

train_dataset = CustomDataset(dataframe=df, feature_names=feature_names, target_name=target_name)
train_data = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True, )

torch.Size([121424, 18])


In [221]:
len(feature_names)

18

In [222]:
model = nn.Sequential(
    nn.Conv1d(64, 1, (18)),
    nn.ReLU(),
    nn.Conv1d(1, 64, (18), padding=1),
    nn.ReLU(),
    nn.Conv1d(64, 64, (9), padding=1),
    nn.ReLU(),
    nn.Conv1d(64, 128, (3), padding=1),
    nn.ReLU(),
    nn.Conv1d(128, 128, (3), padding=1),
    nn.ReLU(),
    nn.Conv1d(128, 128, (1), padding=1),
    nn.Flatten(),
    nn.Linear(128, 64),
    nn.ReLU(),
    nn.Linear(64, 12)
)

In [223]:
optimizer = optim.Adam(params=model.parameters(), lr=0.001, weight_decay=0.001)
loss_function = nn.CrossEntropyLoss()

In [224]:
epochs = 5
model.train()

Sequential(
  (0): Conv1d(64, 1, kernel_size=(18,), stride=(1,))
  (1): ReLU()
  (2): Conv1d(1, 64, kernel_size=(18,), stride=(1,), padding=(1,))
  (3): ReLU()
  (4): Conv1d(64, 64, kernel_size=(9,), stride=(1,), padding=(1,))
  (5): ReLU()
  (6): Conv1d(64, 128, kernel_size=(3,), stride=(1,), padding=(1,))
  (7): ReLU()
  (8): Conv1d(128, 128, kernel_size=(3,), stride=(1,), padding=(1,))
  (9): ReLU()
  (10): Conv1d(128, 128, kernel_size=(1,), stride=(1,), padding=(1,))
  (11): Flatten(start_dim=1, end_dim=-1)
  (12): Linear(in_features=128, out_features=64, bias=True)
  (13): ReLU()
  (14): Linear(in_features=64, out_features=12, bias=True)
)

In [225]:
for e in range(epochs):
    loss_mean = 0
    lm_count = 0

    train_tqdm = tqdm(train_data, leave=True)
    for x_train, y_train in train_tqdm:
        predict = model(x_train)
        loss = loss_function(predict, y_train)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        lm_count += 1
        loss_mean = 1/lm_count * loss.item() + (1 - 1/lm_count) * loss_mean
        train_tqdm.set_description(f'Epoch [{e+1}/{epochs}], loss_mean={loss_mean:.3f} ')

  0%|          | 0/1898 [00:00<?, ?it/s]


RuntimeError: Given groups=1, weight of size [1, 64, 18], expected input[64, 1, 18] to have 64 channels, but got 1 channels instead

In [None]:
class ConvNet(nn.Module):
    def __init__(self, n_class, in_channels):
        super().__init__()

        self.conv_1 = nn.Conv1d(in_channels=in_channels, out_channels=64, kernel_size=2, padding=1)
        self.conv_2 = nn.Conv1d(in_channels=64, out_channels=64, kernel_size=2, padding=1)
