In [3]:
import os 
 
# Set the CUDA_VISIBLE_DEVICES environment variable to the desired GPU ID 
os.environ["CUDA_VISIBLE_DEVICES"] = "1"  # Replace 1 with the GPU ID you want to use 
 
# Now, your Python code will run on the specified GPU 
# For example: 
import torch 
 
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 
print(f"Running on device: {device}") 
 

Running on device: cuda


## Importing Libraries

In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

 ### Protected attributes

These are extracted from the images using the deepface api

In [5]:
annotations = pd.read_csv('label_annotations.csv')    # reading the csv file
annotations.head() 

Unnamed: 0,010354.jpg,middle eastern,Man,neutral
0,009680.jpg,latino hispanic,Woman,happy
1,009028.jpg,white,Woman,happy
2,007702.jpg,white,Woman,sad
3,009681.jpg,white,Man,happy
4,010355.jpg,white,Woman,sad


In [6]:
annotations.columns = ["image_id", "race", "gender", "emotion"]

In [7]:
annotations

Unnamed: 0,image_id,race,gender,emotion
0,009680.jpg,latino hispanic,Woman,happy
1,009028.jpg,white,Woman,happy
2,007702.jpg,white,Woman,sad
3,009681.jpg,white,Man,happy
4,010355.jpg,white,Woman,sad
...,...,...,...,...
10405,000102.jpg,white,Man,fear
10406,000103.jpg,white,Woman,neutral
10407,000104.jpg,white,Man,happy
10408,000105.jpg,white,Man,neutral


 ### Non-protected attributes

These are other attributes of the image dataset that describes the image

In [8]:
other_attributes = pd.read_csv('list_attr_celeba.csv')    # reading the csv file
other_attributes.head() 

Unnamed: 0,image_id,5_o_Clock_Shadow,Arched_Eyebrows,Attractive,Bags_Under_Eyes,Bald,Bangs,Big_Lips,Big_Nose,Black_Hair,...,Sideburns,Smiling,Straight_Hair,Wavy_Hair,Wearing_Earrings,Wearing_Hat,Wearing_Lipstick,Wearing_Necklace,Wearing_Necktie,Young
0,000001.jpg,-1,1,1,-1,-1,-1,-1,-1,-1,...,-1,1,1,-1,1,-1,1,-1,-1,1
1,000002.jpg,-1,-1,-1,1,-1,-1,-1,1,-1,...,-1,1,-1,-1,-1,-1,-1,-1,-1,1
2,000003.jpg,-1,-1,-1,-1,-1,-1,1,-1,-1,...,-1,-1,-1,1,-1,-1,-1,-1,-1,1
3,000004.jpg,-1,-1,1,-1,-1,-1,-1,-1,-1,...,-1,-1,1,-1,1,-1,1,1,-1,1
4,000005.jpg,-1,1,1,-1,-1,-1,1,-1,-1,...,-1,-1,-1,-1,-1,-1,1,-1,-1,1


## Combining protected and non-protected attributes

In [12]:
import pandas as pd

# Assuming you have two DataFrames: 'df1' and 'df2' with a column named 'image_id' that you want to use for merging.

# Perform an inner join based on the 'image_id' column
merged_df = pd.merge(annotations, other_attributes, on='image_id')

# Print the merged DataFrame
print(merged_df)


         image_id             race gender  emotion  5_o_Clock_Shadow  \
0      009680.jpg  latino hispanic  Woman    happy                -1   
1      009028.jpg            white  Woman    happy                -1   
2      007702.jpg            white  Woman      sad                -1   
3      009681.jpg            white    Man    happy                -1   
4      010355.jpg            white  Woman      sad                -1   
...           ...              ...    ...      ...               ...   
10405  000102.jpg            white    Man     fear                -1   
10406  000103.jpg            white  Woman  neutral                -1   
10407  000104.jpg            white    Man    happy                -1   
10408  000105.jpg            white    Man  neutral                -1   
10409  000106.jpg            white  Woman    happy                -1   

       Arched_Eyebrows  Attractive  Bags_Under_Eyes  Bald  Bangs  ...  \
0                    1          -1               -1    -1     

In [13]:
# Select the required columns
selected_df = merged_df[['image_id', 'race', 'gender', 'emotion', 'Male']]

#selected_df.rename(columns={'male': 'gender'}, inplace=True)
selected_df

Unnamed: 0,image_id,race,gender,emotion,Male
0,009680.jpg,latino hispanic,Woman,happy,-1
1,009028.jpg,white,Woman,happy,-1
2,007702.jpg,white,Woman,sad,-1
3,009681.jpg,white,Man,happy,1
4,010355.jpg,white,Woman,sad,-1
...,...,...,...,...,...
10405,000102.jpg,white,Man,fear,1
10406,000103.jpg,white,Woman,neutral,-1
10407,000104.jpg,white,Man,happy,1
10408,000105.jpg,white,Man,neutral,1


In [14]:
selected_df

Unnamed: 0,image_id,race,gender,emotion,Male
0,009680.jpg,latino hispanic,Woman,happy,-1
1,009028.jpg,white,Woman,happy,-1
2,007702.jpg,white,Woman,sad,-1
3,009681.jpg,white,Man,happy,1
4,010355.jpg,white,Woman,sad,-1
...,...,...,...,...,...
10405,000102.jpg,white,Man,fear,1
10406,000103.jpg,white,Woman,neutral,-1
10407,000104.jpg,white,Man,happy,1
10408,000105.jpg,white,Man,neutral,1


In [15]:

# Display the modified DataFrame
# Compare 'gender' and 'gender_predicted' columns
accuracy = (selected_df['gender'] == selected_df['Male'].map({1: 'Man', -1: 'Woman'})).mean()

# Report accuracy
print("Accuracy:", accuracy)


Accuracy: 0.9237271853986552


The gender has been taken from the deep face api. Just to check how well the annotations are done, i compared it with true gender which is human annotated. This shows that the annotations for the attribute gender are 92% correct.

Discarding the annotated gender attribute and keeping only the human labeled from celebA

In [16]:
# Replace values in 'gender' column
selected_df['gender'] = selected_df['Male'].map({1: 'Man', -1: 'Woman'})


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  selected_df['gender'] = selected_df['Male'].map({1: 'Man', -1: 'Woman'})


In [17]:
merged_df

Unnamed: 0,image_id,race,gender,emotion,5_o_Clock_Shadow,Arched_Eyebrows,Attractive,Bags_Under_Eyes,Bald,Bangs,...,Sideburns,Smiling,Straight_Hair,Wavy_Hair,Wearing_Earrings,Wearing_Hat,Wearing_Lipstick,Wearing_Necklace,Wearing_Necktie,Young
0,009680.jpg,latino hispanic,Woman,happy,-1,1,-1,-1,-1,1,...,-1,1,-1,1,1,-1,1,-1,-1,-1
1,009028.jpg,white,Woman,happy,-1,-1,-1,-1,-1,-1,...,-1,1,-1,1,1,-1,1,-1,-1,1
2,007702.jpg,white,Woman,sad,-1,1,-1,-1,-1,1,...,-1,-1,-1,-1,1,-1,1,-1,-1,-1
3,009681.jpg,white,Man,happy,-1,-1,-1,-1,-1,-1,...,-1,1,-1,1,-1,-1,-1,-1,1,-1
4,010355.jpg,white,Woman,sad,-1,-1,-1,-1,-1,-1,...,-1,-1,1,-1,-1,-1,-1,-1,-1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10405,000102.jpg,white,Man,fear,-1,-1,-1,-1,-1,-1,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,1
10406,000103.jpg,white,Woman,neutral,-1,1,1,-1,-1,-1,...,-1,-1,-1,-1,-1,-1,1,1,-1,1
10407,000104.jpg,white,Man,happy,-1,-1,1,1,-1,-1,...,-1,1,-1,-1,-1,-1,-1,-1,1,1
10408,000105.jpg,white,Man,neutral,-1,-1,-1,-1,-1,1,...,-1,-1,1,-1,-1,-1,-1,-1,-1,1


In [18]:
try:
    # Drop the 'male' column
    selected_df = selected_df.drop('Male', axis=1)
except:
    # Display the updated DataFrame
    print("Column already dropped\nThe new dataframe:\n", selected_df)


In [19]:
try:

    merged_df = merged_df.drop(['Male','gender','emotion','race', 'Young'], axis=1)

except:
    # Display the updated DataFrame
    print("Column already dropped\nThe new dataframe:\n", merged_df)


In [20]:
merged_df.columns

Index(['image_id', '5_o_Clock_Shadow', 'Arched_Eyebrows', 'Attractive',
       'Bags_Under_Eyes', 'Bald', 'Bangs', 'Big_Lips', 'Big_Nose',
       'Black_Hair', 'Blond_Hair', 'Blurry', 'Brown_Hair', 'Bushy_Eyebrows',
       'Chubby', 'Double_Chin', 'Eyeglasses', 'Goatee', 'Gray_Hair',
       'Heavy_Makeup', 'High_Cheekbones', 'Mouth_Slightly_Open', 'Mustache',
       'Narrow_Eyes', 'No_Beard', 'Oval_Face', 'Pale_Skin', 'Pointy_Nose',
       'Receding_Hairline', 'Rosy_Cheeks', 'Sideburns', 'Smiling',
       'Straight_Hair', 'Wavy_Hair', 'Wearing_Earrings', 'Wearing_Hat',
       'Wearing_Lipstick', 'Wearing_Necklace', 'Wearing_Necktie'],
      dtype='object')

In [24]:
# Save the DataFrame to a new CSV file
selected_df.to_csv('protected_attributes.csv', index=False)
merged_df.to_csv('non-protected_attributes.csv', index=False)
