In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, classification_report

In [2]:
df = pd.read_csv("age_gender.csv")
df.head()

Unnamed: 0,age,ethnicity,gender,img_name,pixels
0,1,2,0,20161219203650636.jpg.chip.jpg,129 128 128 126 127 130 133 135 139 142 145 14...
1,1,2,0,20161219222752047.jpg.chip.jpg,164 74 111 168 169 171 175 182 184 188 193 199...
2,1,2,0,20161219222832191.jpg.chip.jpg,67 70 71 70 69 67 70 79 90 103 116 132 145 155...
3,1,2,0,20161220144911423.jpg.chip.jpg,193 197 198 200 199 200 202 203 204 205 208 21...
4,1,2,0,20161220144914327.jpg.chip.jpg,202 205 209 210 209 209 210 211 212 214 218 21...


In [3]:
#Checking for NA values
df.isna().sum()

age          0
ethnicity    0
gender       0
img_name     0
pixels       0
dtype: int64

In [4]:
Y1 = df['age']
Y1.nunique()
Y1

0         1
1         1
2         1
3         1
4         1
         ..
23700    99
23701    99
23702    99
23703    99
23704    99
Name: age, Length: 23705, dtype: int64

In [5]:
Y2 = df['ethnicity']
Y2.nunique()

5

In [6]:
Y3 = df['gender']
Y3.nunique()

2

In [7]:
#Getting dimensions of pixels
n_features = len(df.pixels[0].split())
print(len(df.pixels), n_features)


23705 2304


In [8]:
#Spliting String into respective floats
X=np.zeros(shape=(23705,2304))

for i in range(len(df.pixels)):
    a=np.array(df.pixels[i].split(),dtype='float32')
    X[i]=a

In [9]:
#Feature Scaling and Standardization
scaler = StandardScaler()
X = scaler.fit_transform(X)
X = pd.DataFrame(X)
X.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,2294,2295,2296,2297,2298,2299,2300,2301,2302,2303
0,0.552527,0.595723,0.630926,0.608607,0.603081,0.59833,0.573887,0.513939,0.477998,0.43044,...,0.284915,0.361761,0.418779,0.464353,0.506114,0.518489,0.501318,0.517149,0.509225,0.497893
1,1.04843,-0.195187,0.374738,1.25843,1.266118,1.25545,1.257678,1.289544,1.230453,1.211948,...,0.953061,1.236372,1.691311,1.91579,1.811414,1.72312,1.312273,0.997706,0.827259,0.524186
2,-0.32593,-0.253773,-0.228056,-0.257824,-0.312541,-0.411391,-0.451799,-0.410187,-0.341342,-0.232143,...,0.195829,0.128532,0.161413,0.154337,0.158959,0.148886,0.109357,0.06329,0.045425,-0.00167
3,1.459321,1.60633,1.685816,1.753534,1.739716,1.720242,1.697258,1.636091,1.564878,1.500766,...,1.695445,1.717408,1.748504,1.732599,1.700325,1.695742,1.677203,1.705193,1.741608,1.812533
4,1.586839,1.723502,1.851584,1.908253,1.897582,1.864488,1.827504,1.768109,1.698648,1.65367,...,0.908518,0.915681,0.890617,0.87301,0.867154,0.888092,0.866248,0.797474,0.734499,0.695089


In [10]:
#Test train split
X1_train, X1_test, y1_train, y1_test = train_test_split(X, Y1, test_size=0.3, random_state=42)
X2_train, X2_test, y2_train, y2_test = train_test_split(X, Y2, test_size=0.3, random_state=42)
X3_train, X3_test, y3_train, y3_test = train_test_split(X, Y3, test_size=0.3, random_state=42)

#Converting dataframes to tensor for pytorch
X3_train = torch.from_numpy(X3_train.to_numpy()).float()
y3_train = torch.squeeze(torch.from_numpy(y3_train.to_numpy()).float())
X3_test = torch.from_numpy(X3_test.to_numpy()).float()
y3_test = torch.squeeze(torch.from_numpy(y3_test.to_numpy()).float())

print(X3_train.shape, y3_train.shape)
print(X3_test.shape, y3_test.shape)

torch.Size([16593, 2304]) torch.Size([16593])
torch.Size([7112, 2304]) torch.Size([7112])


In [11]:
#Defining Neural Network for Gender
class Neural_Net_Gender(nn.Module):
    def __init__(self):
        super(Neural_Net_Gender, self).__init__()
        self.fc1 = nn.Linear(2304, 64)
        self.fc2 = nn.Linear(64, 1)
        
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = torch.sigmoid(self.fc2(x))
        return x
net = Neural_Net_Gender()
print(net)

loss_function = nn.BCELoss()
optimizer = optim.Adam(net.parameters(), lr=0.1)

Neural_Net_Gender(
  (fc1): Linear(in_features=2304, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=1, bias=True)
)


In [12]:
y3_train

tensor([1., 1., 1.,  ..., 1., 0., 1.])

In [13]:
#Use if you want to use GPU
'''
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

X3_train = X3_train.to(device)
y3_train = y3_train.to(device)

X3_test = X3_test.to(device)
y3_test = y3_test.to(device)

net = net.to(device)

loss_function = loss_function.to(device)'''

'\ndevice = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")\n\nX3_train = X3_train.to(device)\ny3_train = y3_train.to(device)\n\nX3_test = X3_test.to(device)\ny3_test = y3_test.to(device)\n\nnet = net.to(device)\n\nloss_function = loss_function.to(device)'

In [14]:
#Accuracy Functions
def calculate_accuracy(y_true, y_pred):
    predicted = y_pred.ge(.5).view(-1)
    return (y_true == predicted).sum().float() / len(y_true)
def round_tensor(t, decimal_places=3):
    return round(t.item(), decimal_places)

In [15]:
#Model Training for Gender
for i in range(1000):
    y_pred = net(X3_train)
    y_pred = torch.squeeze(y_pred)
    train_loss = loss_function(y_pred, y3_train)
    
    if i % 100 == 0:
        train_acc = calculate_accuracy(y3_train, y_pred)

        y_test_pred = net(X3_test)
        y_test_pred = torch.squeeze(y_test_pred)

        test_loss = loss_function(y_test_pred, y3_test)

        test_acc = calculate_accuracy(y3_test, y_test_pred)
        print(
            f'''epoch {i}
            Train set - loss: {round_tensor(train_loss)}, accuracy: {round_tensor(train_acc)}
            Test  set - loss: {round_tensor(test_loss)}, accuracy: {round_tensor(test_acc)}
            ''')
    optimizer.zero_grad()
    
    train_loss.backward()
    
    optimizer.step()

epoch 0
            Train set - loss: 0.707, accuracy: 0.476
            Test  set - loss: 0.706, accuracy: 0.477
            
epoch 100
            Train set - loss: 4.834, accuracy: 0.826
            Test  set - loss: 4.957, accuracy: 0.823
            
epoch 200
            Train set - loss: 3.949, accuracy: 0.858
            Test  set - loss: 4.364, accuracy: 0.844
            
epoch 300
            Train set - loss: 3.96, accuracy: 0.857
            Test  set - loss: 4.322, accuracy: 0.844
            
epoch 400
            Train set - loss: 3.615, accuracy: 0.87
            Test  set - loss: 4.175, accuracy: 0.849
            
epoch 500
            Train set - loss: 3.084, accuracy: 0.889
            Test  set - loss: 3.833, accuracy: 0.861
            
epoch 600
            Train set - loss: 3.15, accuracy: 0.886
            Test  set - loss: 3.957, accuracy: 0.857
            
epoch 700
            Train set - loss: 2.784, accuracy: 0.899
            Test  set - loss: 3.824, ac

In [16]:
#Saving the Model
MODEL_PATH = 'model.pth'
torch.save(net, MODEL_PATH)



In [17]:
#Reloading the Model
net = torch.load(MODEL_PATH)

In [19]:
#Results for gender
y_pred = net(X3_test)
y_pred = y_pred.ge(.5).view(-1).cpu()
y3_test = y3_test.cpu()

print(classification_report(y3_test, y_pred))

              precision    recall  f1-score   support

         0.0       0.92      0.79      0.85      3741
         1.0       0.80      0.92      0.86      3371

    accuracy                           0.85      7112
   macro avg       0.86      0.86      0.85      7112
weighted avg       0.86      0.85      0.85      7112

