In [11]:
import pandas as pd
import numpy as np

import cv2
import os

## Function that loades images from files and writes then into data frame with corresponding metadata

In [3]:
def images_to_tables(path, categories, source):
    """Description:
        
        Function that takes images from different categories and puts them into a DataFrame with their metadata (height, width, etc.). 
        Images are stored as well in the DataFrame as pixels.
        
        Parameters:
            - path - main directory where images are stored
            - categories - names of subfolders with images for different emotions
            - source - from where pictures where taken (url)
            
        Returns:
            - data frame with pictures and their metadata
        
    """
    
    save_dir = path
    
    data = []

    for cat in categories:  
        path = os.path.join(save_dir, cat)

        for img in os.listdir(path):

            image_path = os.path.join(path, img)

            image_size_bytes = os.path.getsize(image_path)

            image_size_mb = image_size_bytes / (1024 * 1024)    

            img_array = cv2.imread(image_path)

            data.append({          
                "source_id" : source,
                "emotion_id" : cat, 
                "height" : img_array.shape[0], 
                "width" :  img_array.shape[1], 
                "aspect_ratio" : img_array.shape[1] / img_array.shape[0],
                "image_format" : image_path.split('.')[-1].upper(),
                "color_space" : img_array.shape[2] if len(img_array.shape) == 3 else 'Grayscale',
                "file_size_MB" : image_size_mb, 
                "pixels" : img_array
            })   

    emotion_map = {'Anger' : 0, 'Disgust' : 1, 'Fear' : 2, 'Happiness' : 3, 'Sadness' : 4, 'Surprise' : 5, 'Neutral' : 6}
    
    df = pd.DataFrame(data)
    df['emotion_id'] = df['emotion_id'].map(emotion_map)
    
    return df


## Create tables with 'emotions' and 'source' ids

In [12]:
pixabay_API = 'https://pixabay.com/service/about/api/'
ddg_API = 'https://rapidapi.com/epctex-epctex-default/api/duckduckgo10'
kaggle_url = 'https://www.kaggle.com/datasets/sudarshanvaidya/random-images-for-face-emotion-recognition?select=anger'
FER_2013 = 'https://www.kaggle.com/datasets/msambare/fer2013'

pixabay_comment = 'Images downloaded using API. All images are in color and exhibit varying resolutions; sotred in JPG format. '
ddg_comment = 'Images downloaded using API. All images are in color and exhibit varying resolutions; sotred in JPG format. '
kaggle_comment = 'Randomly selected images from data base available at Kaggle. Each image is 224 x 224 pixel grayscale in PNG format.'
FER_2013_comment = 'Randomly selected images from FER-2013 data base. Each image is 48x48 pixel grayscale in JPG format.'

sources = pd.DataFrame({
    'id': [1, 2, 3, 4],
    'source' : ['Pixabay API', 'DuckDuckGo API', 'Kaggle', 'FER-2013'],
    'url_link' :[pixabay_API, ddg_API, kaggle_url, FER_2013],
    'comments' : [pixabay_comment, ddg_comment, kaggle_comment, FER_2013_comment]
})

In [5]:
sources

Unnamed: 0,id,source,url_link,comments
0,1,Pixabay API,https://pixabay.com/service/about/api/,Images downloaded using API. All images are in...
1,2,DuckDuckGo API,https://rapidapi.com/epctex-epctex-default/api...,Images downloaded using API. All images are in...
2,3,Kaggle,https://www.kaggle.com/datasets/sudarshanvaidy...,Randomly selected images from data base availa...
3,4,FER-2013,https://www.kaggle.com/datasets/msambare/fer2013,Randomly selected images from FER-2013 data ba...


In [6]:
emotions_ids = pd.DataFrame({
    'id' : [0, 1, 2, 3, 4, 5, 6, 7],
    'emotion' : ['Anger', 'Disgust', 'Fear', 'Happiness', 'Sadness', 'Surprise', 'Neutral', 'Contempt']
})

In [9]:
emotions_ids

Unnamed: 0,id,emotion
0,0,Anger
1,1,Disgust
2,2,Fear
3,3,Happiness
4,4,Sadness
5,5,Surprise
6,6,Neutral
7,7,Contempt


## Select pictures from pixabay - after face detection

In [17]:
emotions = ['Anger', 'Disgust', 'Fear', 'Happiness', 'Neutral', 'Sadness', 'Surprise']

In [18]:
pixabay_path = '1_pixabay_images/1_Face_Extraction/'  

In [19]:
pixabay = images_to_tables(pixabay_path, categories=emotions, source=1)

In [26]:
pixabay

Unnamed: 0,source_id,emotion_id,height,width,aspect_ratio,image_format,color_space,file_size_MB,pixels
0,1,0,302,302,1.000000,JPG,RGB,0.022323,"[[[255, 255, 255], [255, 255, 255], [255, 255,..."
1,1,0,1218,1280,1.050903,JPG,RGB,0.439991,"[[[68, 68, 68], [73, 73, 73], [63, 63, 63], [8..."
2,1,0,620,620,1.000000,JPG,RGB,0.136570,"[[[231, 231, 231], [232, 232, 232], [233, 233,..."
3,1,0,410,410,1.000000,JPG,RGB,0.041554,"[[[36, 36, 36], [36, 36, 36], [36, 36, 36], [3..."
4,1,0,320,320,1.000000,JPG,RGB,0.078756,"[[[255, 255, 255], [126, 126, 126], [79, 79, 7..."
...,...,...,...,...,...,...,...,...,...
104,1,5,412,412,1.000000,JPG,RGB,0.051921,"[[[12, 2, 2], [12, 2, 2], [12, 2, 2], [11, 1, ..."
105,1,5,206,206,1.000000,JPG,RGB,0.019261,"[[[244, 240, 245], [245, 241, 246], [243, 240,..."
106,1,5,863,857,0.993048,JPG,RGB,0.248372,"[[[40, 40, 40], [39, 39, 39], [46, 46, 46], [5..."
107,1,5,629,626,0.995231,JPG,RGB,0.103067,"[[[252, 252, 252], [254, 254, 254], [255, 255,..."


In [20]:
pixabay['color_space'] = pixabay['color_space'].map({3 : 'RGB'})

In [21]:
pixabay['emotion_id'].value_counts()

emotion_id
3    26
6    25
0    18
5    18
4    15
2     4
1     3
Name: count, dtype: int64

In [189]:
pixabay['emotion'].value_counts(dropna=False)

emotion
3    26
6    25
0    18
5    18
4    15
2     4
1     3
Name: count, dtype: int64

In [200]:
pixabay.describe()

Unnamed: 0,source_id,emotion_id,height,width,aspect_ratio,color_space,file_size_MB
count,109.0,109.0,109.0,109.0,109.0,109.0,109.0
mean,1.0,3.568807,498.972477,495.045872,0.99649,3.0,0.092099
std,0.0,2.05195,214.119492,204.756829,0.029937,0.0,0.069966
min,1.0,0.0,139.0,139.0,0.722656,3.0,0.009028
25%,1.0,3.0,334.0,333.0,1.0,3.0,0.034948
50%,1.0,4.0,484.0,484.0,1.0,3.0,0.079226
75%,1.0,5.0,626.0,626.0,1.0,3.0,0.132438
max,1.0,6.0,1280.0,1280.0,1.050903,3.0,0.439991


In [22]:
pixabay.dtypes

source_id         int64
emotion_id        int64
height            int64
width             int64
aspect_ratio    float64
image_format     object
color_space      object
file_size_MB    float64
pixels           object
dtype: object

In [23]:
pixabay['aspect_ratio'].value_counts()

aspect_ratio
1.000000    92
1.050903     1
0.993048     1
0.993954     1
0.993949     1
1.001792     1
1.002210     1
0.993377     1
1.006316     1
1.001894     1
0.998829     1
0.998004     1
0.997006     1
1.003717     1
0.995485     1
1.006098     1
0.864713     1
0.995231     1
Name: count, dtype: int64

In [24]:
pixabay['aspect_ratio'].min()

0.8647125140924464

In [25]:
pixabay['aspect_ratio'].max()

1.0509031198686372

In [126]:
# df = pd.read_csv('test_pixabay_set.csv', sep = ',')

## Pictures from DuckDuckGo after face detection

In [28]:
ddg_path = '2_duckduckgo_images/1_Face_Extraction/' 

In [29]:
ddg = images_to_tables(ddg_path, categories=emotions, source=2)

In [30]:
ddg['color_space'] = ddg['color_space'].map({3 : 'RGB'})

In [31]:
ddg['emotion_id'].value_counts()

emotion_id
5    80
3    79
1    63
0    62
6    58
2    55
4    51
Name: count, dtype: int64

In [211]:
ddg.shape

(448, 9)

In [243]:
ddg['aspect_ratio'].min(), ddg['aspect_ratio'].max()

(0.8986568986568987, 1.0364372469635628)

In [239]:
ddg['aspect_ratio'].value_counts()

aspect_ratio
1.000000    429
0.998473      1
0.898657      1
0.903955      1
0.998575      1
0.993410      1
0.905775      1
0.995546      1
1.005017      1
0.995918      1
0.995627      1
0.990964      1
1.036437      1
1.031000      1
1.014388      1
0.997093      1
1.003645      1
0.998206      1
0.995717      1
0.986861      1
Name: count, dtype: int64

## Seleced pictures from Kaggle

In [33]:
kaggle_path = '3_Kaggle_Natural_human_face_recognition_224x224px/1_Selected/' 

In [34]:
kaggle = images_to_tables(kaggle_path, categories=emotions, source=3)

In [35]:
kaggle['color_space'] = kaggle['color_space'].map({3 : 'Grayscale'})

In [36]:
kaggle['emotion_id'].value_counts()

emotion_id
0    200
1    200
2    200
3    200
6    200
4    200
5    200
Name: count, dtype: int64

In [251]:
kaggle

Unnamed: 0,source_id,emotion_id,height,width,aspect_ratio,image_format,color_space,file_size_MB,pixels
0,3,0,224,224,1.0,PNG,Grayscale,0.006985,"[[[63, 63, 63], [63, 63, 63], [63, 63, 63], [8..."
1,3,0,224,224,1.0,PNG,Grayscale,0.007193,"[[[8, 8, 8], [8, 8, 8], [8, 8, 8], [6, 6, 6], ..."
2,3,0,224,224,1.0,PNG,Grayscale,0.010216,"[[[38, 38, 38], [38, 38, 38], [38, 38, 38], [3..."
3,3,0,224,224,1.0,PNG,Grayscale,0.006433,"[[[47, 47, 47], [47, 47, 47], [47, 47, 47], [4..."
4,3,0,224,224,1.0,PNG,Grayscale,0.005840,"[[[21, 21, 21], [21, 21, 21], [21, 21, 21], [2..."
...,...,...,...,...,...,...,...,...,...
695,3,5,224,224,1.0,PNG,Grayscale,0.003623,"[[[187, 187, 187], [187, 187, 187], [187, 187,..."
696,3,5,224,224,1.0,PNG,Grayscale,0.004206,"[[[183, 183, 183], [183, 183, 183], [183, 183,..."
697,3,5,224,224,1.0,PNG,Grayscale,0.011027,"[[[139, 139, 139], [139, 139, 139], [145, 145,..."
698,3,5,224,224,1.0,PNG,Grayscale,0.011926,"[[[161, 161, 161], [161, 161, 161], [82, 82, 8..."


## Selected images from FER-2013

In [38]:
FER2013_path = '4_FER-2013-Kaggle/1_Selected/' 

In [39]:
FER2013 = images_to_tables(FER2013_path, categories=emotions, source=4)

In [40]:
FER2013['color_space'] = FER2013['color_space'].map({3 : 'Grayscale'})

In [41]:
FER2013['emotion_id'].value_counts()

emotion_id
0    200
1    200
2    200
3    200
6    200
4    200
5    200
Name: count, dtype: int64

In [236]:
FER2013

Unnamed: 0,source_id,emotion_id,height,width,aspect_ratio,image_format,color_space,file_size_MB,pixels
0,4,0,48,48,1.0,JPG,3,0.001474,"[[[155, 155, 155], [138, 138, 138], [98, 98, 9..."
1,4,0,48,48,1.0,JPG,3,0.001587,"[[[119, 119, 119], [132, 132, 132], [143, 143,..."
2,4,0,48,48,1.0,JPG,3,0.001643,"[[[122, 122, 122], [99, 99, 99], [72, 72, 72],..."
3,4,0,48,48,1.0,JPG,3,0.001677,"[[[43, 43, 43], [68, 68, 68], [57, 57, 57], [7..."
4,4,0,48,48,1.0,JPG,3,0.001497,"[[[2, 2, 2], [2, 2, 2], [6, 6, 6], [5, 5, 5], ..."
...,...,...,...,...,...,...,...,...,...
695,4,5,48,48,1.0,JPG,3,0.001274,"[[[127, 127, 127], [124, 124, 124], [115, 115,..."
696,4,5,48,48,1.0,JPG,3,0.001348,"[[[217, 217, 217], [218, 218, 218], [217, 217,..."
697,4,5,48,48,1.0,JPG,3,0.001473,"[[[165, 165, 165], [193, 193, 193], [197, 197,..."
698,4,5,48,48,1.0,JPG,3,0.001566,"[[[67, 67, 67], [55, 55, 55], [58, 58, 58], [5..."


## Move data frames to SQL

In [13]:
from sqlalchemy import create_engine 
import pymysql.cursors
import getpass
import urllib.parse

**Set up connection between python and sql workbench**

In [14]:
pw_raw = os.getenv('MSQLpass')
pw = urllib.parse.quote_plus(pw_raw)

connection_string = 'mysql+pymysql://root:' + pw + '@localhost:3306/'

engine = create_engine(connection_string)

**Send tables with source and emotions ids**

In [228]:
sources.to_sql('data_sources',engine, 'final_project', if_exists='replace', index=False)

4

In [16]:
emotions_ids.to_sql('emotions_ids',engine, 'final_project', if_exists='replace', index=False)

8

**Send data frames with images from different sources**

In [27]:
pixabay.to_sql('images_pixabay',engine, 'final_project', if_exists='replace', index=True)

109

In [32]:
ddg.to_sql('images_ddg',engine, 'final_project', if_exists='replace', index=True)

448

In [37]:
kaggle.to_sql('images_kaggle',engine, 'final_project', if_exists='replace', index=True)

1400

In [42]:
FER2013.to_sql('images_fer2013',engine, 'final_project', if_exists='replace', index=True)

1400