# Packaging to Price

Cosmetic products' packaging certainly have an influence on their price. Although it could be hard to train a neural network with "sense of beauty" when we only have 6000 images of products, it is worth trying to see how much information we can extract from these pictures...

#### Define neural network structure

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        # 1 input image channel, 6 output channels, 5x5 square convolution
        # kernel
        self.conv1 = nn.Conv2d(3, 8, 5, padding=2)
        self.conv2 = nn.Conv2d(8, 8, 5, padding=2)
        self.conv3 = nn.Conv2d(8, 16, 3, padding=1)
        self.conv4 = nn.Conv2d(16, 16, 3, padding=1)
        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(16*8*8, 32)
        self.fc2 = nn.Linear(32, 1)
        
        self.dropout = nn.Dropout(0.2)

    def forward(self, x):
        # Max pooling over a (2, 2) window
        x = F.elu(self.conv1(x))
        x = F.max_pool2d(x, (2, 2))
        
        x = F.elu(self.conv2(x))
        x = F.max_pool2d(x, (2, 2))
        
        x = F.elu(self.conv3(x))
        x = F.max_pool2d(x, (2, 2))
        
        x = F.elu(self.conv4(x))
        x = F.max_pool2d(x, (2, 2))
        
        # If the size is a square you can only specify a single number
        x = x.view(-1, self.num_flat_features(x))
        x = self.dropout(x)
        x = F.elu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = x.view(-1)
        return x

    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features


model = Net()
print(model)

Net(
  (conv1): Conv2d(3, 8, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (conv2): Conv2d(8, 8, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (conv3): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv4): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (fc1): Linear(in_features=1024, out_features=32, bias=True)
  (fc2): Linear(in_features=32, out_features=1, bias=True)
  (dropout): Dropout(p=0.2)
)


#### Define training and prediction functions
The training function has features include:
* batch gradient decent
* evaluate on validation data
* early-stopping
* load best weights from best epoch

In [2]:
from torchvision.datasets import ImageFolder
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
from tqdm import tqdm
import copy

def train(train_loader, val_loader=None, model=None, epoch=1, optimizer=None, criterion=None, early_stopping=3):
        
    best_loss = 10000000
    best_epoch = 0
    best_model_wts = None
    
    for t in range(epoch):
        
        model.train()
        
        for i_batch, batch in tqdm(enumerate(train_loader)):
            
            batch_X, batch_y = batch
            batch_y_pred = model(batch_X)
            loss = criterion(batch_y_pred, batch_y.type(torch.FloatTensor))
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        
        if val_loader is not None: #evaluate on validation data
            
            model.eval()
            running_loss = 0.0
            with torch.no_grad():
                for i_batch, batch in enumerate(val_loader):
                    batch_X, batch_y = batch
                    batch_y_pred = model(batch_X)
                    loss = criterion(batch_y_pred, batch_y.type(torch.FloatTensor))                   
                    running_loss += loss.item() * batch_X.size()[0]
        
            epoch_loss = running_loss / len(val_loader.dataset)
            print ("epoch %d, loss %.6f"%(t, epoch_loss))
            
            if epoch_loss < best_loss: #keep track of best loss and epoch
                best_loss = epoch_loss
                best_epoch = t
                best_model_wts = copy.deepcopy(model.state_dict())
                
            if (t - best_epoch > early_stopping): #early stopping if loss haven't improve for n=early_stopping rounds
                break
                
    if best_model_wts is not None:
        print("load best weights from epoch %d"%best_epoch)
        model.load_state_dict(best_model_wts)
        
        
def predict(dataloader, model):
    
    N = len(dataloader.dataset)
    n_batches = len(dataloader)
    batch_size = dataloader.batch_size
    predictions = torch.zeros(N)
        
    model.eval()
    with torch.no_grad():
        for i_batch, batch in tqdm(enumerate(dataloader)):
            batch_X, batch_y = batch
            batch_y_pred = model(batch_X)
                
            start = i_batch * batch_size
            end = start + batch_size
            if i_batch == n_batches - 1:
                end = N
            predictions[start:end] = batch_y_pred   
                
    return predictions

#### Define dataset

In [3]:
from skimage import io
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

class ImagesWithPaths(Dataset):
# dataset initialized with given image paths and corresponding labels
    def __init__(self, image_paths, labels, transform=None):
        assert (len(image_paths)==len(labels))
        self.paths = image_paths
        self.labels = labels
        self.transform = transform
        
    def __len__(self):
        return len(self.paths)

    def __getitem__(self, index):
        label = self.labels[index]
        path = self.paths[index]
        image = io.imread(path)
        if self.transform:
            image = self.transform(image)
        return image, label       

#### Customize train_test_split and KFold

In our dataset, same product of different product category are recorded in multiple rows. We should avoid putting these rows in different folds because that may introduce leakage during training and over results would be over-optimistic. Thus we would do train-test split and KFold on unique products, and map the fold number back to the original dataset. This way in the original dataset, the testset ratio may be slightly off what we want originally, and the folds may also be non-equal sized, but this should not be a big problem.

Do train-test split and KFold on all unique products. "-1" indicate test set, 0-5 indicate the fold in training set.

In [4]:
from sklearn.model_selection import KFold, train_test_split
# read data
df = pd.read_csv("../data_cleaning/images.csv")
df['price'] = df['price'].apply(lambda x: x.replace(',','')).astype('float')
df = df.loc[df['price']<300]

pd.set_option('mode.chained_assignment', None)
# get unique products
folds_df = df[['product_names','brand']].drop_duplicates()
train_folds_df, test_folds_df = train_test_split(folds_df, test_size=0.2, shuffle=True, random_state=777)

test_folds_df['fold'] = -1 # test
train_folds_df['fold'] = 0
n_folds = 5
folds = KFold(n_folds, shuffle=True, random_state=777)
for i, (trn_idx, val_idx) in enumerate(folds.split(train_folds_df)):
    train_folds_df['fold'].iloc[val_idx] = i

folds_df = train_folds_df.append(test_folds_df).set_index(['product_names','brand'])
folds_df['fold'].value_counts()

-1    1554
 0    1244
 3    1243
 2    1243
 1    1243
 4    1243
Name: fold, dtype: int64

Map fold number in original dataset, check that the fold sizes are reasonable.

In [5]:
def dfmap(x, series):
    return series.loc[(x[0],x[1])]
df['fold'] = df[['product_names','brand']].apply(dfmap, series=folds_df['fold'], axis=1)

print('total fold count')
print(df['fold'].value_counts())
df.head()

total fold count
-1    1858
 1    1490
 2    1472
 4    1470
 0    1469
 3    1467
Name: fold, dtype: int64


Unnamed: 0,product_names,brand,price,image_path,islogo,fold
0,Clear Complexion Spot Treatment,Merle Norman,20.0,images/skin_care/Clear-Complexion-Spot-Treatme...,0.963436,2
1,Acne Solutions Emergency Gel Lotion,Clinique,17.0,images/skin_care/Acne-Solutions-Emergency-Gel-...,0.01118,0
2,RESIST Daily Pore-Refining Solution 2% BHA,Paula's Choice Skincare,33.0,images/skin_care/RESIST-Daily-Pore-Refining-So...,0.567055,-1
3,Max Complexion Correction Pads,Peter Thomas Roth,40.0,images/skin_care/Max-Complexion-Correction-Pad...,0.005085,-1
4,Naturals Acne Spot Treatment,Neutrogena,8.49,images/skin_care/Naturals-Acne-Spot-Treatment_...,0.000562,1


In this notebook, we will only train on non-logo images (which we have filtered previously in data cleaning). Check that the folds with non-logo image samples are still good.

In [6]:
image_df = df.loc[~df['image_path'].isnull()]
image_df['image_path'] = image_df['image_path'].apply(lambda x: '../data_cleaning/'+x)
image_df = image_df.loc[image_df['islogo']<0.5].drop_duplicates()
print('fold count for non-logo image samples')
print(image_df['fold'].value_counts())
image_df.head()

fold count for non-logo image samples
-1    1282
 0    1026
 4    1013
 1    1004
 2    1000
 3     999
Name: fold, dtype: int64


Unnamed: 0,product_names,brand,price,image_path,islogo,fold
1,Acne Solutions Emergency Gel Lotion,Clinique,17.0,../data_cleaning/images/skin_care/Acne-Solutio...,0.01118,0
3,Max Complexion Correction Pads,Peter Thomas Roth,40.0,../data_cleaning/images/skin_care/Max-Complexi...,0.005085,-1
4,Naturals Acne Spot Treatment,Neutrogena,8.49,../data_cleaning/images/skin_care/Naturals-Acn...,0.000562,1
6,Benzoyl Peroxide 10%,"Jan Marini Skin Research, Inc.",30.0,../data_cleaning/images/skin_care/Benzoyl-Pero...,0.005258,0
9,Blackhead Dissolving Gel,Proactiv & Proactiv+,25.0,../data_cleaning/images/skin_care/Blackhead-Di...,0.007812,3


#### Training

For training set, we will save the out-of-bag predictions from K-Fold cross validation. For test set, we will use the average prediction of all five folds.

In [7]:
test_df = image_df.loc[(image_df['fold']==-1)]
dataset_test = ImagesWithPaths(test_df['image_path'].values, 
                               test_df['price'].values, 
                               transform=transforms.ToTensor()) 
test_loader = DataLoader(dataset_test, batch_size=64, shuffle=False, num_workers=8)

oof_preds = pd.DataFrame()
test_preds = pd.DataFrame({'image_path':test_df['image_path'].values, 
                           'predict_price':np.zeros([len(test_loader.dataset)])})
print('test_size:', len(test_loader.dataset))

for i in range(n_folds):
    train_df = image_df.loc[(image_df['fold']!=i)]
    val_df = image_df.loc[(image_df['fold']==i)]
    
    #dataset
    dataset_train = ImagesWithPaths(train_df['image_path'].values, 
                                    train_df['price'].values, 
                                    transform=transforms.ToTensor()) 
    train_loader = DataLoader(dataset_train, batch_size=64, shuffle=False, num_workers=8)
    
    #dataloader
    dataset_val = ImagesWithPaths(val_df['image_path'].values, 
                                  val_df['price'].values, 
                                  transform=transforms.ToTensor()) 
    val_loader = DataLoader(dataset_val, batch_size=64, shuffle=False, num_workers=8)
    
    print("fold %d, train size: %d, val size: %d"%(i+1, len(train_loader.dataset), len(val_loader.dataset)))
    
    #model, optimizer, loss and training
    model = Net()
    optimizer = optim.Adam(model.parameters())
    criterion = nn.MSELoss()
    train(train_loader, val_loader, model, epoch=20, optimizer=optimizer, criterion=criterion)
    
    #predict on validation set
    predictions = predict(val_loader, model)
    oof_preds = oof_preds.append(pd.DataFrame({'image_path':val_df['image_path'].values, 
                                               'predict_price':predictions.numpy()}))
    #predict on test set
    predictions = predict(test_loader, model)
    test_preds['predict_price'] += predictions.numpy()

test_size: 1282
fold 1, train size: 5298, val size: 1026


83it [02:01,  1.46s/it]


epoch 0, loss 1427.891165


83it [02:01,  1.47s/it]


epoch 1, loss 1395.720558


83it [02:01,  1.47s/it]


epoch 2, loss 1307.898188


83it [02:01,  1.47s/it]


epoch 3, loss 1215.874073


83it [02:01,  1.47s/it]


epoch 4, loss 1208.373418


83it [02:01,  1.47s/it]


epoch 5, loss 1209.155855


83it [02:01,  1.47s/it]


epoch 6, loss 1205.544970


83it [02:02,  1.47s/it]


epoch 7, loss 1198.707761


83it [02:01,  1.46s/it]


epoch 8, loss 1206.425349


83it [02:02,  1.47s/it]


epoch 9, loss 1198.434764


83it [02:01,  1.47s/it]


epoch 10, loss 1186.935790


83it [02:02,  1.47s/it]


epoch 11, loss 1194.266859


83it [02:01,  1.47s/it]


epoch 12, loss 1187.170120


83it [02:01,  1.47s/it]


epoch 13, loss 1185.395041


83it [02:01,  1.46s/it]


epoch 14, loss 1190.747208


83it [02:02,  1.47s/it]


epoch 15, loss 1183.340459


83it [02:01,  1.46s/it]


epoch 16, loss 1202.422673


83it [02:02,  1.47s/it]


epoch 17, loss 1193.135762


83it [02:02,  1.48s/it]


epoch 18, loss 1181.878339


83it [02:02,  1.48s/it]


epoch 19, loss 1190.131306
load best weights from epoch 18


17it [00:13,  1.30it/s]
21it [00:16,  1.30it/s]

fold 2, train size: 5320, val size: 1004



84it [02:02,  1.46s/it]


epoch 0, loss 1377.415144


84it [02:03,  1.46s/it]


epoch 1, loss 1440.801821


84it [02:02,  1.46s/it]


epoch 2, loss 1396.740504


84it [02:03,  1.47s/it]


epoch 3, loss 1347.266137


84it [02:03,  1.47s/it]


epoch 4, loss 1291.657711


84it [02:03,  1.47s/it]


epoch 5, loss 1326.198814


84it [02:03,  1.47s/it]


epoch 6, loss 1232.905001


84it [02:04,  1.48s/it]


epoch 7, loss 1218.068780


84it [02:04,  1.48s/it]


epoch 8, loss 1214.924497


84it [02:04,  1.48s/it]


epoch 9, loss 1213.428562


84it [02:04,  1.48s/it]


epoch 10, loss 1213.724607


84it [02:04,  1.48s/it]


epoch 11, loss 1215.423198


84it [02:04,  1.48s/it]


epoch 12, loss 1224.485352


84it [02:04,  1.48s/it]


epoch 13, loss 1223.531747
load best weights from epoch 9


16it [00:12,  1.24it/s]
21it [00:16,  1.28it/s]

fold 3, train size: 5324, val size: 1000



84it [02:01,  1.45s/it]


epoch 0, loss 1531.923148


84it [02:01,  1.45s/it]


epoch 1, loss 1485.431055


84it [02:02,  1.46s/it]


epoch 2, loss 1384.011502


84it [02:01,  1.45s/it]


epoch 3, loss 1388.313506


84it [02:02,  1.45s/it]


epoch 4, loss 1341.704906


84it [02:02,  1.46s/it]


epoch 5, loss 1357.664564


84it [02:01,  1.45s/it]


epoch 6, loss 1314.443179


84it [02:02,  1.45s/it]


epoch 7, loss 1307.203820


84it [02:02,  1.45s/it]


epoch 8, loss 1304.157524


84it [02:02,  1.45s/it]


epoch 9, loss 1299.830543


84it [02:01,  1.45s/it]


epoch 10, loss 1349.562597


84it [02:01,  1.45s/it]


epoch 11, loss 1302.341422


84it [02:01,  1.45s/it]


epoch 12, loss 1292.239661


84it [02:01,  1.45s/it]


epoch 13, loss 1326.883739


84it [02:01,  1.45s/it]


epoch 14, loss 1314.537301


84it [02:02,  1.45s/it]


epoch 15, loss 1288.219016


84it [02:01,  1.45s/it]


epoch 16, loss 1283.211544


84it [02:02,  1.46s/it]


epoch 17, loss 1280.029984


84it [02:02,  1.45s/it]


epoch 18, loss 1282.739866


84it [02:02,  1.45s/it]


epoch 19, loss 1285.579555
load best weights from epoch 17


16it [00:12,  1.29it/s]
21it [00:15,  1.32it/s]

fold 4, train size: 5325, val size: 999



84it [02:02,  1.46s/it]


epoch 0, loss 1306.268710


84it [02:02,  1.45s/it]


epoch 1, loss 1242.975226


84it [02:03,  1.46s/it]


epoch 2, loss 1205.654553


84it [02:02,  1.46s/it]


epoch 3, loss 1175.085718


84it [02:02,  1.46s/it]


epoch 4, loss 1154.717341


84it [02:02,  1.46s/it]


epoch 5, loss 1144.386506


84it [02:02,  1.46s/it]


epoch 6, loss 1149.005230


84it [02:02,  1.46s/it]


epoch 7, loss 1119.094392


84it [02:03,  1.46s/it]


epoch 8, loss 1147.674136


84it [02:02,  1.46s/it]


epoch 9, loss 1126.588417


84it [02:02,  1.46s/it]


epoch 10, loss 1126.274168


84it [02:02,  1.46s/it]


epoch 11, loss 1125.208251
load best weights from epoch 7


16it [00:12,  1.28it/s]
21it [00:15,  1.31it/s]

fold 5, train size: 5311, val size: 1013



83it [02:02,  1.48s/it]


epoch 0, loss 1464.261156


83it [02:02,  1.48s/it]


epoch 1, loss 1416.442417


83it [02:03,  1.48s/it]


epoch 2, loss 1329.358447


83it [02:02,  1.48s/it]


epoch 3, loss 1251.269454


83it [02:02,  1.48s/it]


epoch 4, loss 1241.233798


83it [02:03,  1.48s/it]


epoch 5, loss 1240.503068


83it [02:02,  1.48s/it]


epoch 6, loss 1265.342350


83it [02:02,  1.48s/it]


epoch 7, loss 1255.099732


83it [02:03,  1.48s/it]


epoch 8, loss 1242.932553


83it [02:02,  1.48s/it]


epoch 9, loss 1245.694845
load best weights from epoch 5


16it [00:12,  1.25it/s]
21it [00:16,  1.30it/s]


#### Save predictions to disk

In [14]:
test_preds['predict_price'] /= n_folds
preds = oof_preds.append(test_preds)
preds['image_path'] = preds['image_path'].apply(lambda x: x[len('../data_cleaning/'):])
preds = preds.set_index('image_path')
df['predicted_price'] = df['image_path'].map(preds['predict_price'])
df.to_csv('image_prediction.csv', index=False)
df.head()

Unnamed: 0,product_names,brand,price,image_path,islogo,fold,predicted_price
0,Clear Complexion Spot Treatment,Merle Norman,20.0,images/skin_care/Clear-Complexion-Spot-Treatme...,0.963436,2,
1,Acne Solutions Emergency Gel Lotion,Clinique,17.0,images/skin_care/Acne-Solutions-Emergency-Gel-...,0.01118,0,26.361757
2,RESIST Daily Pore-Refining Solution 2% BHA,Paula's Choice Skincare,33.0,images/skin_care/RESIST-Daily-Pore-Refining-So...,0.567055,-1,
3,Max Complexion Correction Pads,Peter Thomas Roth,40.0,images/skin_care/Max-Complexion-Correction-Pad...,0.005085,-1,26.56745
4,Naturals Acne Spot Treatment,Neutrogena,8.49,images/skin_care/Naturals-Acne-Spot-Treatment_...,0.000562,1,22.316217
