In [None]:
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision.io import read_image
from torchvision import transforms
from tqdm import tqdm
from sklearn.metrics import f1_score
import numpy as np
import pandas as pd

In [None]:
df = pd.read_csv('train_info.csv')
df.head()

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
train_df, test_df = train_test_split(df, test_size=0.2)

In [None]:
class ocrDataset(Dataset):
    def __init__(self, df, trasform=None, target_transform=None):
        super().__init__()
        self.df = df
        self.transform = trasform
        self.target_transform = target_transform
    
    def __len__(self):
        return len(self.df)

    def __getitem__(self, index):
        file_name = self.df.iloc[index, 0]
        label = self.df.iloc[index, 1]
        image = read_image(f'train/{file_name}')
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        label = torch.tensor(label, dtype=torch.float32).unsqueeze(0)
        return image, label

train_transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ConvertImageDtype(torch.float32),
    transforms.Normalize(mean=[0.5, 0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5, 0.5])
])
test_transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ConvertImageDtype(torch.float32),
    transforms.Normalize(mean=[0.5, 0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5, 0.5])
])

train_dataset = ocrDataset(train_df, trasform=train_transform)
test_dataset = ocrDataset(test_df, trasform=test_transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [None]:

class convBlock(nn.Module):
    def __init__(self, cin, cout, kernel=(3, 3), stride=1, padding=0, dropout_prob=0.1):
        super().__init__()
        self.c1 = nn.Conv2d(cin, cout, kernel, stride, padding)
        self.bn1 = nn.BatchNorm2d(cout)
        self.r1 = nn.ReLU()
        self.c2 = nn.Conv2d(cout, cout, kernel, stride, padding)
        self.bn2 = nn.BatchNorm2d(cout)
        self.r2 = nn.ReLU()
        self.p = nn.MaxPool2d(2)
        self.dropout = nn.Dropout2d(dropout_prob)

    def forward(self, x):
        x = self.c1(x)
        x = self.bn1(x)
        x = self.r1(x)
        x = self.c2(x)
        x = self.bn2(x)
        x = self.r2(x)
        x = self.p(x)
        x = self.dropout(x)
        return x

class ComplexNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.cb1 = convBlock(4, 64, kernel=(3, 3), padding=1)
        self.cb2 = convBlock(64, 128, kernel=(3, 3), padding=1)
        self.cb3 = convBlock(128, 256, kernel=(3, 3), padding=1)
        self.cb4 = convBlock(256, 256, kernel=(3, 3), padding=1)
        self.cb5 = convBlock(256, 512, kernel=(3, 3), padding=1)
        self.f = nn.Flatten()
        self.fc1 = nn.Linear(512 * 4 * 4, 256)
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(256, 64)
        self.o = nn.Linear(64, 1)

    def forward(self, x):
        x = self.cb1(x)
        x = self.cb2(x)
        x = self.cb3(x)
        x = self.cb4(x)
        x = self.cb5(x)
        x = self.f(x)
        x = self.fc1(x)
        x = self.dropout(x)
        x = torch.relu(x)
        x = self.fc2(x)
        x = torch.relu(x)
        x = self.o(x)
        return x

model = ComplexNet()
model.to(device)

In [None]:
loss_fn = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

def train_epoch(dataloader, model, loss_fn, optimizer):
    train_loss = 0
    num_batches = len(dataloader)
    for x, y in tqdm(dataloader):
        x = x.to(device)
        y = y.to(device)
        pred = model(x)
        loss = loss_fn(pred, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    return train_loss / num_batches

def test_epoch(dataloader, model, loss_fn):
    num_batches = len(dataloader)
    test_loss = 0
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for x, y in tqdm(dataloader):
            x = x.to(device)
            y = y.to(device)
            pred = model(x)
            test_loss += loss_fn(pred, y).item()
            pred_binary = (torch.sigmoid(pred) > 0.5).float()
            all_preds.extend(pred_binary.cpu().numpy().flatten())
            all_labels.extend(y.cpu().numpy().flatten())
    test_loss = test_loss / num_batches
    f1 = f1_score(all_labels, all_preds)
    return test_loss, f1

epochs = 20  
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loss = train_epoch(train_loader, model, loss_fn, optimizer)
    print(f"Train Loss: {train_loss:>7f}")
    test_loss, test_f1 = test_epoch(test_loader, model, loss_fn)
    print(f"Test Loss: {test_loss:>7f}")
    print(f"Test F1-Score: {test_f1:>7f}\n")

torch.save(model.state_dict(), 'complex_model.pth')

In [None]:
model = ComplexNet()
model.load_state_dict(torch.load('complex_model.pth'))  
model.to(device)
model.eval()  

transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ConvertImageDtype(torch.float32),
    transforms.Normalize(mean=[0.5, 0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5, 0.5])
])

class PredictDataset(Dataset):
    def __init__(self, image_dir, transform=None):
        self.image_dir = image_dir
        self.transform = transform
        self.image_files = [f for f in os.listdir(image_dir) if f.endswith(('.png', '.jpg', '.jpeg'))]
    
    def __len__(self):
        return len(self.image_files)
    
    def __getitem__(self, index):
        image_path = os.path.join(self.image_dir, self.image_files[index])
        image = read_image(image_path)
        if self.transform:
            image = self.transform(image)
        return image, self.image_files[index]  

image_dir = 'test'  
predict_dataset = PredictDataset(image_dir, transform=transform)
predict_loader = DataLoader(predict_dataset, batch_size=32, shuffle=False)

results = []
with torch.no_grad():
    for images, file_names in predict_loader:
        images = images.to(device)
        outputs = model(images)  
        probs = torch.sigmoid(outputs)  
        preds = (probs > 0.52).float()  
        
        for file_name, prob, pred in zip(file_names, probs.cpu().numpy(), preds.cpu().numpy()):
            results.append({
                'file_name': file_name,
                'probability': prob[0], 
                'prediction': int(pred[0])  
            })

results_df = pd.DataFrame(results)
results_df.head()

In [None]:
index =[int(x[0]) for x in results_df['file_name'].str.split('.').values.tolist()]
results_df['index'] = index
results_df.index = results_df['index']
results_df.sort_index(inplace=True)

In [None]:

submision = results_df[['prediction', 'index']]
submision.columns = ['type', 'answer']
submision['answer'] = 0
submision.to_csv('submission.csv', index=False)