In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!unzip /content/drive/MyDrive/early-detection-of-3d-printing-issues.zip -d /content

[1;30;43m流式输出内容被截断，只能显示最后 5000 行内容。[0m
  inflating: /content/images/104/1679267208/1679267698.244203.jpg  
  inflating: /content/images/104/1679267208/1679267698.661616.jpg  
  inflating: /content/images/104/1679267208/1679267699.071359.jpg  
  inflating: /content/images/104/1679267208/1679267699.48775.jpg  
  inflating: /content/images/104/1679267208/1679267699.8902.jpg  
  inflating: /content/images/104/1679267208/1679267700.292825.jpg  
  inflating: /content/images/104/1679267208/1679267700.711052.jpg  
  inflating: /content/images/104/1679267208/1679267701.12365.jpg  
  inflating: /content/images/104/1679267208/1679267701.536303.jpg  
  inflating: /content/images/104/1679267208/1679267701.94097.jpg  
  inflating: /content/images/104/1679267208/1679267702.360286.jpg  
  inflating: /content/images/104/1679267208/1679267702.776695.jpg  
  inflating: /content/images/104/1679267208/1679267703.187971.jpg  
  inflating: /content/images/104/1679267208/1679267703.606154.jpg  
  inflating:

In [3]:
import pandas as pd
import numpy as np
import os

# Read the data
root = '/content/'

data = pd.read_csv(os.path.join(root, 'train.csv'))
data.head()

Unnamed: 0,img_path,printer_id,print_id,has_under_extrusion
0,101/1678589738/1678589914.060332.jpg,101,1678589738,1
1,101/1678589738/1678589914.462857.jpg,101,1678589738,1
2,101/1678589738/1678589914.875075.jpg,101,1678589738,1
3,101/1678589738/1678589915.284495.jpg,101,1678589738,1
4,101/1678589738/1678589915.700486.jpg,101,1678589738,1


In [4]:
# split the data into train and validation
from sklearn.model_selection import train_test_split
train, val = train_test_split(data, test_size=0.2)
# reset the index of the dataframes
train.index = range(train.shape[0])
val.index = range(val.shape[0])
train.head()

Unnamed: 0,img_path,printer_id,print_id,has_under_extrusion
0,103/1678835470/1678835606.404203.jpg,103,1678835470,0
1,102/1678761262/1678761724.179179.jpg,102,1678761262,1
2,022/1672794032/1672794236.540517.jpg,22,1672794032,1
3,101/1678333984/1678334202.545926.jpg,101,1678333984,0
4,102/1678819096/1678819460.666725.jpg,102,1678819096,1


In [5]:
def get_weights(data):
    # group the dataframe by printer_id and print_id
    grouped = data.groupby(['printer_id', 'print_id'])

    # calculate the total number of images for each printer_id and print_id combination
    totals = grouped.size()

    # calculate the number print jobs for each printer
    num_print_jobs = totals.groupby('printer_id').size()

    # calculate the weight of each image
    weights = ( totals.sum() / (totals * num_print_jobs) ).reset_index(name='weights')

    # merge the weights column with the original dataframe
    df = pd.merge(data, weights, on=['printer_id', 'print_id'])

    return df['weights'].tolist()

train_weights = get_weights(train)
#val_weights = get_weights(val)
len(train_weights)

64848

In [7]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
import torch.autograd as autograd

class ResNet_Classifier(nn.Module):
    def __init__(self, num_domains=5):
        super(ResNet_Classifier, self).__init__()
        resnet = models.resnet18(pretrained=False)
        self.resnet = nn.Sequential(
            resnet.conv1,
            resnet.bn1,
            resnet.relu,
            resnet.maxpool,
            resnet.layer1,
            resnet.layer2,
            resnet.layer3,
            nn.AdaptiveAvgPool2d((1,1)) # GAP layer
        )
        self.classifier = nn.Sequential(
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        features = self.resnet(x)
        features = features.view(features.size(0), -1)

        output = self.classifier(features)

        return output

In [11]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from sklearn.metrics import f1_score
from datetime import datetime
from make_dataset import MyDataset

# construct dataloaders
img_root = root + 'images/'
train_sampler = WeightedRandomSampler(train_weights, len(train_weights), replacement=True)

batch_size = 32
train_dataset = MyDataset(root=img_root, meta_data=train, img_size=(640, 480))
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, num_workers=8, sampler=train_sampler)
val_dataset = MyDataset(root=img_root, meta_data=val, img_size=(640, 480))
val_loader = DataLoader(dataset=val_dataset, batch_size=128, num_workers=8, shuffle=False)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# define the model
model = ResNet_Classifier().to(device)

# define the loss function
criterion = nn.BCELoss().to(device)

# define the optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=4e-6, weight_decay=1e-6)


patience = 3
num_epochs = 9

# train the model
cur_patience = 0
scores = []
for epoch in range(num_epochs):
    start_time = datetime.now()
    for i, (images, labels) in enumerate(train_loader):
        # Forward pass
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels.view(-1, 1).float())
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % 100 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                    .format(epoch+1, num_epochs, i+1, len(train_dataset)//batch_size, loss.item()))

            
    # test the model
    with torch.no_grad():
        true_labels = []
        predicted_labels = []
        for i, (images, labels) in enumerate(val_loader):
            images = images.to(device)
            labels = labels.to(device).view(-1, 1)
            outputs = model(images)
            predicted = (outputs > 0.5).long()
            true_labels.extend(labels.cpu().numpy())
            predicted_labels.extend(predicted.cpu().numpy())

        f1 = f1_score(true_labels, predicted_labels)
        if len(scores) > 0 and f1 < scores[-1]:
            cur_patience += 1
            if cur_patience >= patience:
                print('Early Stopped!')
                break
        else:
            #cur_patience = 0
            if len(scores) == 0 or f1 > max(scores):
                torch.save(model.state_dict(), 'model.pt')
        scores.append(f1)

        print(f'Current impatience: {cur_patience}, Total patience: {patience}')
        print('F1-score of the model on the test images: {} %'.format(100 * f1))
    time_elapsed = datetime.now() - start_time
    time_to_completion = time_elapsed * (num_epochs - epoch - 1)
    print(f'Time elapsed: {time_elapsed}, ETA: {time_to_completion}')



Epoch [1/9], Step [100/2026], Loss: 0.6834
Epoch [1/9], Step [200/2026], Loss: 0.6602
Epoch [1/9], Step [300/2026], Loss: 0.6363
Epoch [1/9], Step [400/2026], Loss: 0.6320
Epoch [1/9], Step [500/2026], Loss: 0.6171
Epoch [1/9], Step [600/2026], Loss: 0.5790
Epoch [1/9], Step [700/2026], Loss: 0.5700
Epoch [1/9], Step [800/2026], Loss: 0.5557
Epoch [1/9], Step [900/2026], Loss: 0.4941
Epoch [1/9], Step [1000/2026], Loss: 0.4355
Epoch [1/9], Step [1100/2026], Loss: 0.4501
Epoch [1/9], Step [1200/2026], Loss: 0.4230
Epoch [1/9], Step [1300/2026], Loss: 0.3290
Epoch [1/9], Step [1400/2026], Loss: 0.3708
Epoch [1/9], Step [1500/2026], Loss: 0.3709
Epoch [1/9], Step [1600/2026], Loss: 0.3341
Epoch [1/9], Step [1700/2026], Loss: 0.3667
Epoch [1/9], Step [1800/2026], Loss: 0.2586
Epoch [1/9], Step [1900/2026], Loss: 0.2383
Epoch [1/9], Step [2000/2026], Loss: 0.2796
Current impatience: 0, Total patience: 3
F1-score of the model on the test images: 92.46826516220028 %
Time elapsed: 0:05:12.4422

In [12]:
from torchvision import transforms
import pandas as pd
import numpy as np
from PIL import Image
import os


sample = pd.read_csv(os.path.join(root, 'sample_submission.csv'))
print(sample.info())

model = ResNet_Classifier().to(device)
model.load_state_dict(torch.load('/content/model.pt'))

# evaluate the model on test set
model.eval()
transform = transforms.Compose([
            transforms.Resize( (640, 480) ),
            transforms.ToTensor(),
        ])
with torch.no_grad():
    for path in sample['img_path']:
        img = Image.open(os.path.join(img_root, path))
        img = transform(img)
        img = img.unsqueeze(0)
        img = img.to(device)
        output = model(img)
        predicted = (output.squeeze() > 0.5).long()
        sample.loc[sample['img_path'] == path, 'has_under_extrusion'] = predicted.item()

print(sample.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25279 entries, 0 to 25278
Data columns (total 2 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   img_path             25279 non-null  object
 1   has_under_extrusion  25279 non-null  int64 
dtypes: int64(1), object(1)
memory usage: 395.1+ KB
None




<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25279 entries, 0 to 25278
Data columns (total 2 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   img_path             25279 non-null  object
 1   has_under_extrusion  25279 non-null  int64 
dtypes: int64(1), object(1)
memory usage: 395.1+ KB
None


In [15]:
sample.to_csv('ans.csv', index=False)