In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset, random_split, TensorDataset
from torchvision.transforms import functional as VF
from torchmetrics.classification import BinaryJaccardIndex
from torchvision import models, datasets, tv_tensors
from torchvision.transforms import v2
import pandas as pd
import pandas as pd
import numpy as np
import os
import re
import matplotlib.pyplot as plt
from sklearn.preprocessing import RobustScaler
from tqdm import tqdm
import random
import data_pipeline

In [3]:
import unet
import baseline
import json
model = unet.UNet(n_channels=1, n_classes=1)
# model = baseline.Baseline()
model.load_state_dict(torch.load('./unet_8.pt', map_location=torch.device('cpu')))
test_df = data_pipeline.build_test_dataframe(use_processed_images=False)
train_df = data_pipeline.build_dataframe(use_processed_images=False)

In [4]:
X_test, X_names, X_train = data_pipeline.build_test_dataloaders(test_df, train_df)

In [5]:
test_dl = DataLoader(TensorDataset(X_test), batch_size=1)

In [6]:
predictions = {}
model.eval()
for index, x in tqdm(enumerate(test_dl)):
  out = model(x[0])
  preds = (F.sigmoid(out) > .5)*1.
  name = X_names[index][0]
  predictions[name] = preds.flatten().tolist()

0it [00:00, ?it/s]

2538it [00:24, 102.59it/s]


In [7]:
preds_df = pd.DataFrame.from_dict(predictions, orient='index')

In [8]:
preds_df.to_csv('KIRBY_predictions_6.csv')

# Pipe Prediction

In [23]:

def get_pipe_id_dataloaders(train_dataframe, valid_dataframe):
  data = torch.from_numpy(np.vstack(train_dataframe['data'].to_numpy()))
  data = torch.nan_to_num(data)
  labels = torch.from_numpy(np.vstack(train_dataframe['labels'].to_numpy()))

  valid_data = torch.from_numpy(np.vstack(valid_dataframe['data'].to_numpy()))
  valid_data = torch.nan_to_num(valid_data)
  valid_labels = torch.from_numpy(np.vstack(valid_dataframe['well_number'].to_numpy())).squeeze() - 1
  X_names = np.vstack(valid_dataframe['filename'].to_numpy())

  X_train, X_valid = data.float().reshape(-1, 1, 36, 36), valid_data.float().reshape(-1, 1, 36, 36)
  Y_train, Y_valid = labels, valid_labels

  return X_train, X_valid, Y_train, Y_valid, X_names

In [24]:
import pipe_identifier
import json

X_train, X_valid, Y_train, Y_valid, X_names = get_pipe_id_dataloaders(data_pipeline.build_dataframe(), data_pipeline.build_test_dataframe())
model = pipe_identifier.PipeIdentifier(num_classes=15)
model.load_state_dict(torch.load('./well_classifier_2.pt', map_location=torch.device('cpu')))


<All keys matched successfully>

In [25]:
pipe_id_dataloader = DataLoader(TensorDataset(X_valid), batch_size=1)

In [26]:
predictions = {}
model.eval()
for index, x in tqdm(enumerate(pipe_id_dataloader)):
  out = model(x[0])
  preds = torch.argmax(F.softmax(out, dim=1))
  name = X_names[index][0]
  predictions[name] = preds.flatten().tolist()

2538it [00:20, 121.54it/s]


In [27]:
preds_df = pd.DataFrame.from_dict(predictions, orient='index')

In [28]:
preds_df.value_counts()

12    2397
5       87
10      54
Name: count, dtype: int64

In [21]:
predictions = {}
model.eval()
for index, x in tqdm(enumerate(X_train)):
  out = model(x.unsqueeze(dim=0))
  preds = torch.argmax(F.softmax(out, dim=1))
  name = Y_train[index][0]
  predictions[name] = preds.flatten().tolist()

4717it [00:31, 150.14it/s]


In [22]:
preds_df = pd.DataFrame.from_dict(predictions, orient='index')

# Performance so far


- Unet 5  - 0.38 validation IoU - Two Down Convs + All Images Scaled + Bulk Augments
- Unet 8  - 0.64 validation IoU - Two Down Convs + No Scaling + Random Augments
- Unet 9  - 0.63 validation IoU - Three down convs + No Scaling + Random Augments
- Unet 10 - 0.57 validation IoU - One down conv + No Scaling + Random Augments


However on the test set our results are much worse. But it appears that the scaling does not make much of a difference

When looking at the output we are just predicting 0.0 for everything, and that is how we wind up with an unchanged score