In [1]:
import pandas as pd
import numpy as np
import os

In [2]:
model_category = 'Women Tshirts'

In [None]:
path_eff_b6 = "../../Models/eff_net_b6_fillna_eff_net_Women Tshirts_preproc.pth"
path_eff_b7 = "../../Models/eff_net_b7_fillna_eff_net_Women Tshirts_preproc.pth"
path_mobile_v3_large = "../../Models/mobile_net_v3_large_fillna_eff_net_Women Tshirts.pth"

In [None]:
train_df_Men_Tshirts = pd.read_csv('../../Preprocessor-FillNA/output/train_fillna_Women Tshirts_effnet_b5_4epochs.csv')
train_df_Men_Tshirts

Unnamed: 0,id,Category,len,attr_1,attr_2,attr_3,attr_4,attr_5,attr_6,attr_7,attr_8,image_path
0,32601,Women Tshirts,8,multicolor,loose,long,default,default,default,regular sleeves,default,/kaggle/input/visual-taxonomy/train_images/032...
1,32602,Women Tshirts,8,yellow,loose,long,default,default,long sleeves,regular sleeves,default,/kaggle/input/visual-taxonomy/train_images/032...
2,32603,Women Tshirts,8,multicolor,loose,long,default,default,default,regular sleeves,default,/kaggle/input/visual-taxonomy/train_images/032...
3,32604,Women Tshirts,8,multicolor,loose,long,default,default,long sleeves,regular sleeves,default,/kaggle/input/visual-taxonomy/train_images/032...
4,32605,Women Tshirts,8,multicolor,loose,long,default,default,default,regular sleeves,default,/kaggle/input/visual-taxonomy/train_images/032...
...,...,...,...,...,...,...,...,...,...,...,...,...
18769,51370,Women Tshirts,8,white,regular,crop,printed,funky print,short sleeves,regular sleeves,default,/kaggle/input/visual-taxonomy/train_images/051...
18770,51371,Women Tshirts,8,white,regular,crop,printed,typography,short sleeves,regular sleeves,default,/kaggle/input/visual-taxonomy/train_images/051...
18771,51372,Women Tshirts,8,yellow,regular,crop,printed,typography,short sleeves,regular sleeves,default,/kaggle/input/visual-taxonomy/train_images/051...
18772,51373,Women Tshirts,8,white,regular,regular,printed,funky print,short sleeves,regular sleeves,default,/kaggle/input/visual-taxonomy/train_images/051...


In [5]:
attr_columns = train_df_Men_Tshirts.filter(like='attr_').columns.to_list() # Adjust if more attributes exist
print(attr_columns)

['attr_1', 'attr_2', 'attr_3', 'attr_4', 'attr_5', 'attr_6', 'attr_7', 'attr_8']


In [6]:
from sklearn.preprocessing import LabelEncoder

# Initialize LabelEncoder for each attribute column
label_encoders = {}
for column in attr_columns:
    le = LabelEncoder()
    train_df_Men_Tshirts[column] = le.fit_transform(train_df_Men_Tshirts[column])
    label_encoders[column] = le  # Store the encoder for inverse transformation later if needed

# Check the updated DataFrame
# train_df_Men_Tshirts = train_df_Men_Tshirts.head()

In [7]:
from sklearn.model_selection import train_test_split

# Split the dataset into training and validation sets
train_df1, val_df1 = train_test_split(train_df_Men_Tshirts, test_size=0.15, random_state=42)

In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import models, transforms
from PIL import Image

# Define custom dataset
class CustomDataset(Dataset):
    def __init__(self, dataframe, img_dir, transform=None, is_test=False):
        self.dataframe = dataframe
        self.img_dir = img_dir
        self.transform = transform
        self.is_test = is_test  # Flag to indicate if it's test set without labels

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_path = self.img_dir + self.dataframe.iloc[idx]['image_path']
        image = Image.open(img_path).convert('RGB')
        
        if self.transform:
            image = self.transform(image)
        
        if self.is_test:  # For test set, just return the image without labels
            return image
        
        # For train/validation set, return image and labels
        labels = self.dataframe.iloc[idx][attr_columns].values
        labels = labels.astype(np.int64)  # Ensure labels are integers
        labels = torch.tensor(labels, dtype=torch.long)
        
        return image, labels
    
    
# Transform
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),  # Apply Color Jitter
    transforms.RandomHorizontalFlip(),  # Apply Horizontal Flip with 50% probability
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
    
train_dataset = CustomDataset(dataframe=train_df1, img_dir='', transform=transform)
val_dataset = CustomDataset(dataframe=val_df1, img_dir='', transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

In [9]:
import ssl

ssl._create_default_https_context = ssl._create_stdlib_context

In [10]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models

class MultiOutputModel1(nn.Module):
    def __init__(self, num_classes):
        super(MultiOutputModel1, self).__init__()
        # Use MobileNetV2 as the base model
        self.base_model = models.efficientnet_b6(pretrained=True)
        self.base_model.classifier = nn.Identity()  # Remove original classification layer
        
        # Add an adaptive pooling layer to make sure output is flat
        self.pooling = nn.AdaptiveAvgPool2d((1, 1))  # Convert 2D output to 1D
        
        # Dynamically create a fully connected layer for each attribute
        self.output_layers = nn.ModuleDict()
        for attr, n_classes in num_classes.items():
            self.output_layers[attr] = nn.Linear(2304, n_classes)  # Adjust input to 1280 for MobileNetV2

    def forward(self, x):
        x = self.base_model.features(x)  # Extract features
        x = self.pooling(x)  # Adaptive pool to (1, 1) shape
        x = torch.flatten(x, 1)  # Flatten the output to (batch_size, 1280)
        
        outputs = {}
        # Dynamically compute output for each attribute
        for attr, layer in self.output_layers.items():
            outputs[attr] = layer(x)
        
        return list(outputs.values())

# Example usage
num_classes = {}
for key in attr_columns:
    num_classes[key] = train_df_Men_Tshirts[key].nunique()

model_eff_b6 = MultiOutputModel1(num_classes)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_eff_b6.parameters(), lr=0.0001)

Downloading: "https://download.pytorch.org/models/efficientnet_b6_lukemelas-24a108a5.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b6_lukemelas-24a108a5.pth
100%|██████████| 165M/165M [00:02<00:00, 72.4MB/s] 


In [11]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models

class MultiOutputModel2(nn.Module):
    def __init__(self, num_classes):
        super(MultiOutputModel2, self).__init__()
        # Use MobileNetV2 as the base model
        self.base_model = models.efficientnet_b7(pretrained=True)
        self.base_model.classifier = nn.Identity()  # Remove original classification layer
        
        # Add an adaptive pooling layer to make sure output is flat
        self.pooling = nn.AdaptiveAvgPool2d((1, 1))  # Convert 2D output to 1D
        
        # Dynamically create a fully connected layer for each attribute
        self.output_layers = nn.ModuleDict()
        for attr, n_classes in num_classes.items():
            self.output_layers[attr] = nn.Linear(2560, n_classes)  # Adjust input to 1280 for MobileNetV2

    def forward(self, x):
        x = self.base_model.features(x)  # Extract features
        x = self.pooling(x)  # Adaptive pool to (1, 1) shape
        x = torch.flatten(x, 1)  # Flatten the output to (batch_size, 1280)
        
        outputs = {}
        # Dynamically compute output for each attribute
        for attr, layer in self.output_layers.items():
            outputs[attr] = layer(x)
        
        return list(outputs.values())

# Example usage
num_classes = {}
for key in attr_columns:
    num_classes[key] = train_df_Men_Tshirts[key].nunique()

model_eff_b7 = MultiOutputModel2(num_classes)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_eff_b7.parameters(), lr=0.0001)

Downloading: "https://download.pytorch.org/models/efficientnet_b7_lukemelas-c5b4e57e.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b7_lukemelas-c5b4e57e.pth
100%|██████████| 255M/255M [00:01<00:00, 221MB/s] 


In [12]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models

class MultiOutputModel3(nn.Module):
    def __init__(self, num_classes):
        super(MultiOutputModel3, self).__init__()
        # Use MobileNetV2 as the base model
        self.base_model = models.mobilenet_v3_large(pretrained=True)
        self.base_model.classifier = nn.Identity()  # Remove original classification layer
        
        # Add an adaptive pooling layer to make sure output is flat
        self.pooling = nn.AdaptiveAvgPool2d((1, 1))  # Convert 2D output to 1D
        
        # Dynamically create a fully connected layer for each attribute
        self.output_layers = nn.ModuleDict()
        for attr, n_classes in num_classes.items():
            self.output_layers[attr] = nn.Linear(960, n_classes)  # Adjust input to 1280 for MobileNetV2

    def forward(self, x):
        x = self.base_model.features(x)  # Extract features
        x = self.pooling(x)  # Adaptive pool to (1, 1) shape
        x = torch.flatten(x, 1)  # Flatten the output to (batch_size, 1280)
        
        outputs = {}
        # Dynamically compute output for each attribute
        for attr, layer in self.output_layers.items():
            outputs[attr] = layer(x)
        
        return list(outputs.values())

# Example usage
num_classes = {}
for key in attr_columns:
    num_classes[key] = train_df_Men_Tshirts[key].nunique()

model_mobile_net_v3_large = MultiOutputModel3(num_classes)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_mobile_net_v3_large.parameters(), lr=0.0001)

Downloading: "https://download.pytorch.org/models/mobilenet_v3_large-8738ca79.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v3_large-8738ca79.pth
100%|██████████| 21.1M/21.1M [00:00<00:00, 170MB/s]


In [13]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_eff_b6 = model_eff_b6.to(device)
model_eff_b7 =  model_eff_b7.to(device)
model_mobile_net_v3_large = model_mobile_net_v3_large.to(device)
# Training loop
def train(model, train_loader, val_loader, criterion, optimizer, num_epochs=10, save_path=f"eff_net_b6_fillna_eff_net_{model_category}.pth"):
    scheduler = optim.lr_scheduler.StepLR(optimizer = optimizer , step_size=5, gamma=0.5)
    best_val_loss = float('inf')
    for epoch in tqdm(range(num_epochs)):
        model.train()
        train_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)  # Move tensors to the correct device
            optimizer.zero_grad()
            outputs = model(images)
            loss = sum([criterion(output, label) for output, label in zip(outputs, labels.T)])
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        scheduler.step()
        
        print(f'Epoch {epoch+1}, Train Loss: {train_loss/len(train_loader)}')

        # Validation
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)  # Move tensors to the correct device
                outputs = model(images)
                loss = sum([criterion(output, label) for output, label in zip(outputs, labels.T)])
                val_loss += loss.item()
        
        avg_val_loss = val_loss / len(val_loader)
        print(f'Epoch {epoch+1}, Val Loss: {val_loss/len(val_loader)}')
        
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            torch.save(model.state_dict(), save_path)
            print(f"Model saved with Val Loss: {best_val_loss:.4f} at Epoch {epoch+1}")

# Run training
# train(model, train_loader, val_loader, criterion, optimizer, num_epochs=7)

In [14]:
torch.manual_seed(42)
model_eff_b6.load_state_dict(torch.load(f"{path_eff_b6}"))

  model_eff_b6.load_state_dict(torch.load(f"{path_eff_b6}"))


<All keys matched successfully>

In [15]:
torch.manual_seed(42)
model_eff_b7.load_state_dict(torch.load(f"{path_eff_b7}"))

  model_eff_b7.load_state_dict(torch.load(f"{path_eff_b7}"))


<All keys matched successfully>

In [16]:
torch.manual_seed(42)
model_mobile_net_v3_large.load_state_dict(torch.load(f"{path_mobile_v3_large}"))

  model_mobile_net_v3_large.load_state_dict(torch.load(f"{path_mobile_v3_large}"))


<All keys matched successfully>

In [17]:
torch.manual_seed(42)
model_eff_b6.eval()

MultiOutputModel1(
  (base_model): EfficientNet(
    (features): Sequential(
      (0): Conv2dNormActivation(
        (0): Conv2d(3, 56, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (1): BatchNorm2d(56, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
        (2): SiLU(inplace=True)
      )
      (1): Sequential(
        (0): MBConv(
          (block): Sequential(
            (0): Conv2dNormActivation(
              (0): Conv2d(56, 56, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=56, bias=False)
              (1): BatchNorm2d(56, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
              (2): SiLU(inplace=True)
            )
            (1): SqueezeExcitation(
              (avgpool): AdaptiveAvgPool2d(output_size=1)
              (fc1): Conv2d(56, 14, kernel_size=(1, 1), stride=(1, 1))
              (fc2): Conv2d(14, 56, kernel_size=(1, 1), stride=(1, 1))
              (activation): SiLU(inplace=True)
         

In [18]:
torch.manual_seed(42)
model_eff_b7.eval()

MultiOutputModel2(
  (base_model): EfficientNet(
    (features): Sequential(
      (0): Conv2dNormActivation(
        (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (1): BatchNorm2d(64, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
        (2): SiLU(inplace=True)
      )
      (1): Sequential(
        (0): MBConv(
          (block): Sequential(
            (0): Conv2dNormActivation(
              (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=64, bias=False)
              (1): BatchNorm2d(64, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
              (2): SiLU(inplace=True)
            )
            (1): SqueezeExcitation(
              (avgpool): AdaptiveAvgPool2d(output_size=1)
              (fc1): Conv2d(64, 16, kernel_size=(1, 1), stride=(1, 1))
              (fc2): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1))
              (activation): SiLU(inplace=True)
         

In [19]:
torch.manual_seed(42)
model_mobile_net_v3_large.eval()

MultiOutputModel3(
  (base_model): MobileNetV3(
    (features): Sequential(
      (0): Conv2dNormActivation(
        (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
        (2): Hardswish()
      )
      (1): InvertedResidual(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=16, bias=False)
            (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
            (2): ReLU(inplace=True)
          )
          (1): Conv2dNormActivation(
            (0): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
          )
        )
      )
      (2): InvertedResidual(
        (block): Sequential(
          (0): Conv2dNorm

In [None]:
test_df = pd.read_csv('../../Dataset/test.csv')
test_df

Unnamed: 0,id,Category
0,0,Men Tshirts
1,1,Men Tshirts
2,2,Men Tshirts
3,3,Men Tshirts
4,4,Men Tshirts
...,...,...
30200,30484,Women Tops & Tunics
30201,30485,Women Tops & Tunics
30202,30486,Women Tops & Tunics
30203,30487,Women Tops & Tunics


In [None]:
test_df_Men_Tshirts = test_df[test_df['Category'] == model_category]
test_df_Men_Tshirts['len'] = 8

def format_image_path_test(row):
    return f"../../Dataset/test_images/{str(row['id']).zfill(6)}.jpg"

test_df_Men_Tshirts['image_path'] = test_df_Men_Tshirts.apply(format_image_path_test, axis=1)
test_df_Men_Tshirts

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_df_Men_Tshirts['len'] = 5
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_df_Men_Tshirts['image_path'] = test_df_Men_Tshirts.apply(format_image_path_test, axis=1)


Unnamed: 0,id,Category,len,image_path
13349,13615,Women Tshirts,5,/kaggle/input/visual-taxonomy/test_images/0136...
13350,13616,Women Tshirts,5,/kaggle/input/visual-taxonomy/test_images/0136...
13351,13617,Women Tshirts,5,/kaggle/input/visual-taxonomy/test_images/0136...
13352,13618,Women Tshirts,5,/kaggle/input/visual-taxonomy/test_images/0136...
13353,13619,Women Tshirts,5,/kaggle/input/visual-taxonomy/test_images/0136...
...,...,...,...,...
23275,23559,Women Tshirts,5,/kaggle/input/visual-taxonomy/test_images/0235...
23276,23560,Women Tshirts,5,/kaggle/input/visual-taxonomy/test_images/0235...
23277,23561,Women Tshirts,5,/kaggle/input/visual-taxonomy/test_images/0235...
23278,23562,Women Tshirts,5,/kaggle/input/visual-taxonomy/test_images/0235...


In [22]:
# test_df_Men_Tshirts = test_df_Men_Tshirts.sample(128)

In [23]:
# Test dataset without labels
test_dataset = CustomDataset(dataframe=test_df_Men_Tshirts, img_dir='', transform=transform, is_test=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [24]:
len(test_loader)

311

In [25]:
from tqdm import tqdm  # Use console version of tqdm

predictions = []

with torch.no_grad():
    for images in tqdm(test_loader):  # This will display in the console
        images = images.cuda() if torch.cuda.is_available() else images

        # Forward pass through both models
        torch.manual_seed(42)
        outputs1 = model_eff_b6(images)
        outputs2 = model_eff_b7(images)
        outputs3 = model_mobile_net_v3_large(images)
#         outputs3 = model_eff_b5(images)

        # Initialize a list to hold blended predictions for the batch
        batch_preds = []

        # Loop through the outputs and blend predictions for each attribute
        for out1, out2 ,out3 in zip(outputs1, outputs2,outputs3):
            # Blend logits by averaging
            blended_output = (out1 + out2 + out3)/3

            # Get the predicted classes from the blended output
            _, pred = torch.max(blended_output, 1)
            batch_preds.append(pred.cpu().numpy())  # Move to CPU and convert to numpy

        # Stack predictions along a new dimension and add to predictions list
        predictions.append(np.stack(batch_preds, axis=1))  # Shape: (batch_size, num_attributes)

# Combine all predictions into a single array
predictions = np.concatenate(predictions, axis=0)

# Display final predictions
print(predictions)  # This will be an array with shape (num_samples, num_attributes)


100%|██████████| 311/311 [05:23<00:00,  1.04s/it]

[[5 2 1 ... 0 1 1]
 [3 2 2 ... 2 1 1]
 [1 2 2 ... 2 1 1]
 ...
 [6 2 0 ... 1 0 1]
 [5 2 2 ... 2 1 1]
 [5 1 1 ... 2 1 1]]





In [26]:
# Assuming predictions is a numpy array of shape (N, 5)
# Add new columns attr_1 to attr_10 to test_df
for i in range(1, 11):
    test_df_Men_Tshirts[f'attr_{i}'] = np.nan 

# Assign predictions to attr_1 to attr_5
test_df_Men_Tshirts[attr_columns] = predictions

# Optionally save the updated test_df to CSV
# test_df.to_csv('test_predictions_with_attrs.csv', index=False)

# Display the first few rows of the updated DataFrame
test_df_Men_Tshirts.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_df_Men_Tshirts[f'attr_{i}'] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_df_Men_Tshirts[f'attr_{i}'] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_df_Men_Tshirts[f'attr_{i}'] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .l

Unnamed: 0,id,Category,len,image_path,attr_1,attr_2,attr_3,attr_4,attr_5,attr_6,attr_7,attr_8,attr_9,attr_10
13349,13615,Women Tshirts,5,/kaggle/input/visual-taxonomy/test_images/0136...,5,2,1,1,5,0,1,1,,
13350,13616,Women Tshirts,5,/kaggle/input/visual-taxonomy/test_images/0136...,3,2,2,0,0,2,1,1,,
13351,13617,Women Tshirts,5,/kaggle/input/visual-taxonomy/test_images/0136...,1,2,2,2,4,2,1,1,,
13352,13618,Women Tshirts,5,/kaggle/input/visual-taxonomy/test_images/0136...,1,2,2,1,0,2,1,1,,
13353,13619,Women Tshirts,5,/kaggle/input/visual-taxonomy/test_images/0136...,0,2,0,1,5,2,1,1,,


In [27]:
# Inverse transform predictions for each attribute using the stored label encoders
for attr in attr_columns:
    # Inverse transform using the corresponding label encoder
    test_df_Men_Tshirts[attr] = label_encoders[attr].inverse_transform(test_df_Men_Tshirts[attr].astype(int))

# Check the updated DataFrame¯ with original attribute names instead of encoded numbers
test_df_Men_Tshirts

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_df_Men_Tshirts[attr] = label_encoders[attr].inverse_transform(test_df_Men_Tshirts[attr].astype(int))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_df_Men_Tshirts[attr] = label_encoders[attr].inverse_transform(test_df_Men_Tshirts[attr].astype(int))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-

Unnamed: 0,id,Category,len,image_path,attr_1,attr_2,attr_3,attr_4,attr_5,attr_6,attr_7,attr_8,attr_9,attr_10
13349,13615,Women Tshirts,5,/kaggle/input/visual-taxonomy/test_images/0136...,white,regular,long,printed,typography,default,regular sleeves,default,,
13350,13616,Women Tshirts,5,/kaggle/input/visual-taxonomy/test_images/0136...,multicolor,regular,regular,default,default,short sleeves,regular sleeves,default,,
13351,13617,Women Tshirts,5,/kaggle/input/visual-taxonomy/test_images/0136...,default,regular,regular,solid,solid,short sleeves,regular sleeves,default,,
13352,13618,Women Tshirts,5,/kaggle/input/visual-taxonomy/test_images/0136...,default,regular,regular,printed,default,short sleeves,regular sleeves,default,,
13353,13619,Women Tshirts,5,/kaggle/input/visual-taxonomy/test_images/0136...,black,regular,crop,printed,typography,short sleeves,regular sleeves,default,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23275,23559,Women Tshirts,5,/kaggle/input/visual-taxonomy/test_images/0235...,white,regular,regular,printed,graphic,short sleeves,regular sleeves,default,,
23276,23560,Women Tshirts,5,/kaggle/input/visual-taxonomy/test_images/0235...,default,regular,crop,printed,quirky,short sleeves,regular sleeves,default,,
23277,23561,Women Tshirts,5,/kaggle/input/visual-taxonomy/test_images/0235...,yellow,regular,crop,printed,typography,long sleeves,cuffed sleeves,default,,
23278,23562,Women Tshirts,5,/kaggle/input/visual-taxonomy/test_images/0235...,white,regular,regular,printed,funky print,short sleeves,regular sleeves,default,,


In [28]:
# Get value counts for each specified column
columns_of_interest = attr_columns

for column in columns_of_interest:
    print(f"Value counts for {column}:\n")
    print(test_df_Men_Tshirts[column].value_counts(dropna=False))  # Including NaN values
    print("\n" + "-"*50 + "\n")

Value counts for attr_1:

attr_1
white         2979
default       1810
black         1404
yellow        1183
pink          1073
multicolor     871
maroon         611
Name: count, dtype: int64

--------------------------------------------------

Value counts for attr_2:

attr_2
regular    8717
loose      1043
boxy        171
Name: count, dtype: int64

--------------------------------------------------

Value counts for attr_3:

attr_3
regular    5254
crop       3666
long       1011
Name: count, dtype: int64

--------------------------------------------------

Value counts for attr_4:

attr_4
printed    8398
solid       902
default     631
Name: count, dtype: int64

--------------------------------------------------

Value counts for attr_5:

attr_5
typography     2904
funky print    2284
default        1677
quirky         1105
graphic        1039
solid           922
Name: count, dtype: int64

--------------------------------------------------

Value counts for attr_6:

attr_6
short slee

In [None]:
sub_df = pd.read_csv('../../Dataset/sample_submission.csv')
sub_df

Unnamed: 0,id,Category,len,attr_1,attr_2,attr_3,attr_4,attr_5,attr_6,attr_7,attr_8,attr_9,attr_10
0,47001,Women Tshirts,8,maroon,regular,crop,printed,typography,short sleeves,regular sleeves,default,nu,nu
1,16431,Sarees,10,same as saree\n,temple border,small border,cream,party,jacquard,woven design,zari woven,ethnic motif,no
2,55700,Women Tops & Tunics,10,white,fitted,regular,round neck,casual,solid,solid,short sleeves,regular sleeves,knitted
3,15698,Sarees,10,same as saree\n,zari,small border,white,party,jacquard,woven design\n,zari woven,floral,no
4,30330,Kurtis,9,yellow,a-line,knee length\n,daily,net,default,solid,three-quarter sleeves,regular,nu


In [30]:
test_df_Men_Tshirts = test_df_Men_Tshirts.fillna('dummy')

In [None]:
sub_df_Men_Tshirts = test_df_Men_Tshirts.drop('image_path', axis = 1)
sub_df_Men_Tshirts.to_csv(f"output/sub_df_{model_category}_effnet_0.33_b7_0.33_b6_0.33_mobile_net_v3_large_blending.csv", index = False)
sub_df_Men_Tshirts.to_csv(f"sub_df_{model_category}_effnet_0.33_b7_0.33_b6_0.33_mobile_net_v3_large_blending.csv", index = False)
sub_df_Men_Tshirts

Unnamed: 0,id,Category,len,attr_1,attr_2,attr_3,attr_4,attr_5,attr_6,attr_7,attr_8,attr_9,attr_10
13349,13615,Women Tshirts,5,white,regular,long,printed,typography,default,regular sleeves,default,dummy,dummy
13350,13616,Women Tshirts,5,multicolor,regular,regular,default,default,short sleeves,regular sleeves,default,dummy,dummy
13351,13617,Women Tshirts,5,default,regular,regular,solid,solid,short sleeves,regular sleeves,default,dummy,dummy
13352,13618,Women Tshirts,5,default,regular,regular,printed,default,short sleeves,regular sleeves,default,dummy,dummy
13353,13619,Women Tshirts,5,black,regular,crop,printed,typography,short sleeves,regular sleeves,default,dummy,dummy
...,...,...,...,...,...,...,...,...,...,...,...,...,...
23275,23559,Women Tshirts,5,white,regular,regular,printed,graphic,short sleeves,regular sleeves,default,dummy,dummy
23276,23560,Women Tshirts,5,default,regular,crop,printed,quirky,short sleeves,regular sleeves,default,dummy,dummy
23277,23561,Women Tshirts,5,yellow,regular,crop,printed,typography,long sleeves,cuffed sleeves,default,dummy,dummy
23278,23562,Women Tshirts,5,white,regular,regular,printed,funky print,short sleeves,regular sleeves,default,dummy,dummy
