In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from efficientnet_pytorch import EfficientNet
import pandas as pd
train_data = pd.read_csv('train.csv')
val_data = pd.read_csv('val.csv')

# Step 3: Define a custom dataset class (assuming entity value is continuous)
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, image_links, entity_values, transform=None):
        self.image_links = image_links
        self.entity_values = entity_values
        self.transform = transform

    def __len__(self):
        return len(self.image_links)

    def __getitem__(self, idx):
        image = Image.open(self.image_links[idx])
        if self.transform:
            image = self.transform(image)
        
        label = torch.tensor(float(self.entity_values[idx]))  # Assuming continuous target
        return image, label

# Step 4: Define the model
class EfficientNetModel(nn.Module):
    def __init__(self):
        super(EfficientNetModel, self).__init__()
        # Load a pretrained EfficientNet model
        self.efficientnet = EfficientNet.from_pretrained('efficientnet-b0')
        
        # Adjust the final layer to match the output size (1 for entity value prediction)
        self.fc = nn.Linear(self.efficientnet._fc.in_features, 1)
        self.efficientnet._fc = nn.Identity()  # Remove original FC layer

    def forward(self, x):
        features = self.efficientnet(x)
        output = self.fc(features)
        return output

# Step 5: Set up training parameters
def train_model(model, dataloaders, criterion, optimizer, num_epochs=25):
    model = model.to(device)
    
    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs-1}')
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0

            # Iterate over data
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # Zero the parameter gradients
                optimizer.zero_grad()

                # Forward
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)

                    # Backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # Statistics
                running_loss += loss.item() * inputs.size(0)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)

            print(f'{phase} Loss: {epoch_loss:.4f}')

    return model

# Step 6: Prepare dataset and data loader
# Define the image transformations
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

# Assuming train.csv is loaded and contains `image_link` and `entity_value` columns
train_dataset = CustomDataset(image_links=train_data['image_link'],
                              entity_values=train_data['entity_value'],
                              transform=data_transforms['train'])
val_dataset = CustomDataset(image_links=val_data['image_link'],
                            entity_values=val_data['entity_value'],
                            transform=data_transforms['val'])

dataloaders = {
    'train': DataLoader(train_dataset, batch_size=32, shuffle=True),
    'val': DataLoader(val_dataset, batch_size=32, shuffle=False)
}

# Step 7: Initialize model, loss, and optimizer
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model = EfficientNetModel()
criterion = nn.MSELoss()  # Mean Squared Error Loss for continuous values
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Step 8: Train the model
model = train_model(model, dataloaders, criterion, optimizer, num_epochs=10)

# Step 9: Save the model
torch.save(model.state_dict(), 'efficientnet_entity_value_model.pth')


NameError: name 'train_data' is not defined