In [None]:
# ============================# Cell 1 - Imports and setup# ============================import osimport randomimport numpy as npimport pandas as pdfrom PIL import Imageimport matplotlib.pyplot as pltfrom sklearn.model_selection import train_test_splitfrom sklearn.metrics import classification_report, confusion_matriximport torchfrom torch import nnfrom torch.utils.data import Dataset, DataLoaderfrom torchvision import transforms, models%matplotlib inlineSEED = 42random.seed(SEED)np.random.seed(SEED)torch.manual_seed(SEED)torch.cuda.manual_seed_all(SEED)torch.backends.cudnn.deterministic = Truetorch.backends.cudnn.benchmark = Falsedevice = torch.device("cuda" if torch.cuda.is_available() else "cpu")print("Using device:", device)

In [None]:
# =========================================# Cell 2 - Paths and labels dataframe# =========================================ROOT_DIR      = "."TRAIN_IMG_DIR = os.path.join(ROOT_DIR, "train_data")TEST_IMG_DIR  = os.path.join(ROOT_DIR, "test_data")LABELS_PATH   = os.path.join(ROOT_DIR, "train_labels.csv")labels_df = pd.read_csv(LABELS_PATH)print(labels_df.head())print("\nClass distribution:")print(labels_df["label"].value_counts())

In [None]:
# ==================================================# Cell 3 - Label encoding (string <-> integer)# ==================================================unique_labels = sorted(labels_df["label"].unique())label_to_idx = {lbl: idx for idx, lbl in enumerate(unique_labels)}idx_to_label = {idx: lbl for lbl, idx in label_to_idx.items()}labels_df["label_idx"] = labels_df["label"].map(label_to_idx)def make_img_path(fname): return os.path.join(TRAIN_IMG_DIR, fname)labels_df["img_path"] = labels_df["sample_index"].apply(make_img_path)for p in labels_df["img_path"].head(): print(p, "->", os.path.exists(p))

In [None]:
# ==========================================# Cell 4 - Train/Validation split# ==========================================train_df, val_df = train_test_split(    labels_df, test_size=0.2, random_state=42,    stratify=labels_df["label_idx"])print("Train size:", len(train_df))print("Val size:", len(val_df))

In [None]:
# ==========================================# Cell 5 - Custom PyTorch Dataset# ==========================================class DoctogresDataset(Dataset):    def __init__(self, df, transform=None):        self.df = df.reset_index(drop=True)        self.transform = transform    def __len__(self): return len(self.df)    def __getitem__(self, idx):        row = self.df.iloc[idx]        img = Image.open(row["img_path"]).convert("RGB")        if self.transform: img = self.transform(img)        return img, int(row["label_idx"])

In [None]:
# ==================================================# Cell 6 - Transforms and Dataloaders# ==================================================IMAGENET_MEAN=[0.485,0.456,0.406]IMAGENET_STD=[0.229,0.224,0.225]IMG_SIZE=224train_transform=transforms.Compose([    transforms.RandomResizedCrop(IMG_SIZE,scale=(0.8,1.0)),    transforms.RandomHorizontalFlip(),    transforms.RandomVerticalFlip(),    transforms.ColorJitter(0.1,0.1,0.1),    transforms.ToTensor(),    transforms.Normalize(IMAGENET_MEAN,IMAGENET_STD),])val_transform=transforms.Compose([    transforms.Resize(256),    transforms.CenterCrop(IMG_SIZE),    transforms.ToTensor(),    transforms.Normalize(IMAGENET_MEAN,IMAGENET_STD),])train_loader=DataLoader(DoctogresDataset(train_df,train_transform),batch_size=16,shuffle=True)val_loader=DataLoader(DoctogresDataset(val_df,val_transform),batch_size=16,shuffle=False)

In [None]:
# ==========================================# Cell 7 - Model definition# ==========================================num_classes=len(unique_labels)model=models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)model.fc=nn.Linear(model.fc.in_features,num_classes)model=model.to(device)print(model)

In [None]:
# =======================================================# Cell 8 - Loss and optimizer# =======================================================class_counts=train_df["label_idx"].value_counts().sort_index().values.astype(float)class_weights=1.0/class_countsclass_weights=class_weights/class_weights.sum()*len(class_counts)criterion=nn.CrossEntropyLoss(weight=torch.tensor(class_weights,device=device))optimizer=torch.optim.Adam(model.parameters(),lr=1e-4)

In [None]:
# ==========================================# Cell 9 - Train and validation loops# ==========================================def train_one_epoch(model,loader,criterion,optimizer,device):    model.train()    tot_loss=tot_corr=tot=0    for x,y in loader:        x,y=x.to(device),y.to(device)        optimizer.zero_grad()        out=model(x)        loss=criterion(out,y)        loss.backward()        optimizer.step()        _,pred=out.max(1)        tot_loss+=loss.item()*x.size(0)        tot_corr+=(pred==y).sum().item()        tot+=y.size(0)    return tot_loss/tot, tot_corr/totdef evaluate(model,loader,criterion,device):    model.eval()    tot_loss=tot_corr=tot=0    all_t=[]; all_p=[]    with torch.no_grad():        for x,y in loader:            x,y=x.to(device),y.to(device)            out=model(x)            loss=criterion(out,y)            _,pred=out.max(1)            tot_loss+=loss.item()*x.size(0)            tot_corr+=(pred==y).sum().item()            tot+=y.size(0)            all_t+=y.cpu().numpy().tolist()            all_p+=pred.cpu().numpy().tolist()    return tot_loss/tot, tot_corr/tot, np.array(all_t), np.array(all_p)

In [None]:
# ==========================================# Cell 10 - Main training loop# ==========================================EPOCHS=10best_acc=0best_w=Nonefor e in range(1,EPOCHS+1):    print(f"\nEpoch {e}/{EPOCHS}")    tr_l,tr_a=train_one_epoch(model,train_loader,criterion,optimizer,device)    print("Train:",tr_l,tr_a)    vl_l,vl_a,vt,vp=evaluate(model,val_loader,criterion,device)    print("Val:",vl_l,vl_a)    if vl_a>best_acc:        best_acc=vl_a        best_w=model.state_dict().copy()print("Best val acc:",best_acc)model.load_state_dict(best_w)

In [None]:
# ==========================================# Cell 11 - Validation report# ==========================================vl_l,vl_a,vt,vp=evaluate(model,val_loader,criterion,device)print("Val acc:",vl_a)print(classification_report(vt,vp,target_names=unique_labels))

In [None]:
# ==========================================# Cell 12 - Test dataset# ==========================================test_files=sorted([f for f in os.listdir(TEST_IMG_DIR) if f.endswith(".png")])test_df=pd.DataFrame({"sample_index":test_files,                      "img_path":[os.path.join(TEST_IMG_DIR,f) for f in test_files]})class DoctogresTestDataset(Dataset):    def __init__(self,df,transform=None):        self.df=df.reset_index(drop=True); self.t=transform    def __len__(self): return len(self.df)    def __getitem__(self,i):        r=self.df.iloc[i]        img=Image.open(r["img_path"]).convert("RGB")        if self.t: img=self.t(img)        return img, r["sample_index"]test_loader=DataLoader(DoctogresTestDataset(test_df,val_transform),batch_size=16,shuffle=False)

In [None]:
# ==========================================# Cell 13 - Submission# ==========================================model.eval()ids=[]; preds=[]with torch.no_grad():    for x,names in test_loader:        x=x.to(device)        out=model(x)        _,p=out.max(1)        ids+=list(names)        preds+=p.cpu().numpy().tolist()pred_labels=[idx_to_label[i] for i in preds]sub=pd.DataFrame({"sample_index":ids,"label":pred_labels}).sort_values("sample_index")sub.to_csv("submission.csv",index=False)print("Submission saved.")