In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset,DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, classification_report

In [2]:
# Device configuration
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [3]:
df = pd.read_csv("age_gender.csv")
df.head()

Unnamed: 0,age,ethnicity,gender,img_name,pixels
0,1,2,0,20161219203650636.jpg.chip.jpg,129 128 128 126 127 130 133 135 139 142 145 14...
1,1,2,0,20161219222752047.jpg.chip.jpg,164 74 111 168 169 171 175 182 184 188 193 199...
2,1,2,0,20161219222832191.jpg.chip.jpg,67 70 71 70 69 67 70 79 90 103 116 132 145 155...
3,1,2,0,20161220144911423.jpg.chip.jpg,193 197 198 200 199 200 202 203 204 205 208 21...
4,1,2,0,20161220144914327.jpg.chip.jpg,202 205 209 210 209 209 210 211 212 214 218 21...


In [4]:
#Checking for NA values
df.isna().sum()

age          0
ethnicity    0
gender       0
img_name     0
pixels       0
dtype: int64

In [5]:
Y = df['ethnicity']
Y.nunique()

5

In [6]:
#Getting dimensions of pixels
n_features = len(df.pixels[0].split())
print(len(df.pixels), n_features)


23705 2304


In [7]:
#Spliting String into respective floats
X=np.zeros(shape=(23705,2304))

for i in range(len(df.pixels)):
    a=np.array(df.pixels[i].split(),dtype='float32')
    X[i]=a

In [8]:
X

array([[129., 128., 128., ..., 146., 146., 146.],
       [164.,  74., 111., ..., 182., 170., 148.],
       [ 67.,  70.,  71., ..., 112., 111., 108.],
       ...,
       [ 59.,  50.,  37., ...,  98.,  78.,  78.],
       [ 45., 108., 120., ...,  32.,  35.,  35.],
       [156., 161., 160., ..., 190., 184., 174.]])

In [9]:
X = X.reshape(-1,48,48)

In [10]:
X.shape

(23705, 48, 48)

In [11]:
n = len(X)

In [12]:
X_train, X_test, y_train, y_test = train_test_split(
    X, Y, test_size=0.33, random_state=42)

In [13]:
class ImageDataTrain(Dataset):
    def __init__(self):
        self.x = torch.unsqueeze(torch.from_numpy(X_train).float(),1)
        self.y = torch.unsqueeze(torch.Tensor(y_train.values).float(),1)
        self.n_samples = len(y_train)
    def __getitem__(self,index):
        return self.x[index], self.y[index]
    def __len__(self):
        return self.n_samples

dataset_tr = ImageDataTrain()    
train_loader = DataLoader(dataset=dataset_tr, batch_size=50, shuffle=True, num_workers=0)

train_iter = iter(train_loader)
train_data = train_iter.next()

In [31]:
class ImageDataTest(Dataset):
    def __init__(self):
        self.x = torch.unsqueeze(torch.from_numpy(X_test).float(),1)
        self.y = torch.unsqueeze(torch.Tensor(y_test.values).float(),1)
        self.n_samples = len(y_test)
    def __getitem__(self,index):
        return self.x[index], self.y[index]
    def __len__(self):
        return self.n_samples

dataset_te = ImageDataTest()    
test_loader = DataLoader(dataset=dataset_te, batch_size=500, shuffle=True, num_workers=0)

test_iter = iter(test_loader)
test_data = test_iter.next()
test_features, test_labels = test_data

In [20]:
class Ethinicity(nn.Module):
    def __init__(self):
        super(Ethinicity,self).__init__()
        
        self.conv_layers = nn.Sequential(
                        #Conv Layer 1
                        nn.Conv2d(in_channels=1,out_channels=6, kernel_size=3,stride=1,padding=1),
                        nn.ReLU(inplace=True),
                        nn.BatchNorm2d(4),
                        nn.MaxPool2d(kernel_size=2, stride=2),
                        #Conv Layer 2
                        nn.Conv2d(in_channels=6,out_channels=6, kernel_size=3,stride=1,padding=1),
                        nn.ReLU(inplace=True),
                        nn.BatchNorm2d(4),
                        nn.MaxPool2d(kernel_size=2, stride=2))
        
        
        self.linear_layers = nn.Sequential(
                        nn.Linear(6*12*12,5))
        
    
    def forward(self, x):
        x = self.conv_layers(x)
        x = x.view(x.size(0),-1)
        x = self.linear_layers(x)


In [16]:
net = Ethinicity()
print(net)

Ethinicity(
  (conv_layers): Sequential(
    (0): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(6, 6, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (linear_layers): Sequential(
    (0): Linear(in_features=864, out_features=5, bias=True)
  )
)


In [17]:
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.01)


In [22]:
num_epochs = 5

In [23]:
total_step = len(train_loader)
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)
                
        # Forward pass
        outputs = net(images)
        loss = criterion(outputs, labels.squeeze().long())
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % 5 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

Epoch [1/5], Step [5/318], Loss: 1.5147
Epoch [1/5], Step [10/318], Loss: 1.5276
Epoch [1/5], Step [15/318], Loss: 1.5080
Epoch [1/5], Step [20/318], Loss: 1.5125
Epoch [1/5], Step [25/318], Loss: 1.4262
Epoch [1/5], Step [30/318], Loss: 1.4024
Epoch [1/5], Step [35/318], Loss: 1.4671
Epoch [1/5], Step [40/318], Loss: 1.4667
Epoch [1/5], Step [45/318], Loss: 1.4265
Epoch [1/5], Step [50/318], Loss: 1.4192
Epoch [1/5], Step [55/318], Loss: 1.2973
Epoch [1/5], Step [60/318], Loss: 1.1752
Epoch [1/5], Step [65/318], Loss: 1.4941
Epoch [1/5], Step [70/318], Loss: 1.4626
Epoch [1/5], Step [75/318], Loss: 1.3859
Epoch [1/5], Step [80/318], Loss: 1.4431
Epoch [1/5], Step [85/318], Loss: 1.5296
Epoch [1/5], Step [90/318], Loss: 1.4783
Epoch [1/5], Step [95/318], Loss: 1.3422
Epoch [1/5], Step [100/318], Loss: 1.2689
Epoch [1/5], Step [105/318], Loss: 1.4618
Epoch [1/5], Step [110/318], Loss: 1.3977
Epoch [1/5], Step [115/318], Loss: 1.5105
Epoch [1/5], Step [120/318], Loss: 1.3390
Epoch [1/5],

Epoch [4/5], Step [45/318], Loss: 1.4618
Epoch [4/5], Step [50/318], Loss: 1.4891
Epoch [4/5], Step [55/318], Loss: 1.5280
Epoch [4/5], Step [60/318], Loss: 1.4736
Epoch [4/5], Step [65/318], Loss: 1.3125
Epoch [4/5], Step [70/318], Loss: 1.4165
Epoch [4/5], Step [75/318], Loss: 1.4211
Epoch [4/5], Step [80/318], Loss: 1.4894
Epoch [4/5], Step [85/318], Loss: 1.5298
Epoch [4/5], Step [90/318], Loss: 1.3425
Epoch [4/5], Step [95/318], Loss: 1.4213
Epoch [4/5], Step [100/318], Loss: 1.4099
Epoch [4/5], Step [105/318], Loss: 1.6544
Epoch [4/5], Step [110/318], Loss: 1.6017
Epoch [4/5], Step [115/318], Loss: 1.4592
Epoch [4/5], Step [120/318], Loss: 1.3817
Epoch [4/5], Step [125/318], Loss: 1.3585
Epoch [4/5], Step [130/318], Loss: 1.4592
Epoch [4/5], Step [135/318], Loss: 1.4933
Epoch [4/5], Step [140/318], Loss: 1.4230
Epoch [4/5], Step [145/318], Loss: 1.3615
Epoch [4/5], Step [150/318], Loss: 1.4615
Epoch [4/5], Step [155/318], Loss: 1.3660
Epoch [4/5], Step [160/318], Loss: 1.4865
Epo

In [73]:
net.eval()
pred_labels = net(test_features)

In [79]:
# Save the model checkpoint
torch.save(net.state_dict(), 'model.ckpt')