# Bibliotecas

In [17]:
from tqdm.notebook import tqdm

import pandas as pd
import os

from sklearn.model_selection import train_test_split

# 1. Modelos de ruídos

In [2]:
noises_available = ['gaussian', 'uniform', 'exponential', 'poisson', 'salt_pepper']

# 2. Criando dataframe base


In [11]:
PATH = "/home/lucas/datasets"
arquivos = []
for dir_ in [x for x in os.listdir(PATH) if "crop_face" in x]:
    root_ = f"{PATH}/{dir_}"
    lista = [x for x in os.listdir(root_) if (x.endswith("png") or x.endswith("jpeg") or x.endswith("jpg"))]
    for file_ in tqdm(lista):
        arquivos.append([f"{root_}/{file_}",file_,dir_.split("_")[0],"normal"])

  0%|          | 0/600 [00:00<?, ?it/s]

  0%|          | 0/1035 [00:00<?, ?it/s]

  0%|          | 0/337 [00:00<?, ?it/s]

  0%|          | 0/2330 [00:00<?, ?it/s]

In [14]:
df_base = pd.DataFrame(arquivos,columns=["full_path","image_name","dataset","noise"])
df_base

Unnamed: 0,full_path,image_name,dataset,noise
0,/home/lucas/datasets/300W_crop_face/outdoor_08...,outdoor_085.png,300W,normal
1,/home/lucas/datasets/300W_crop_face/indoor_297...,indoor_297.png,300W,normal
2,/home/lucas/datasets/300W_crop_face/outdoor_22...,outdoor_227.png,300W,normal
3,/home/lucas/datasets/300W_crop_face/outdoor_03...,outdoor_031.png,300W,normal
4,/home/lucas/datasets/300W_crop_face/outdoor_02...,outdoor_023.png,300W,normal
...,...,...,...,...
4297,/home/lucas/datasets/helen_crop_face/253368567...,2533685677_2.png,helen,normal
4298,/home/lucas/datasets/helen_crop_face/220369372...,2203693721_2.png,helen,normal
4299,/home/lucas/datasets/helen_crop_face/230071339...,2300713390_1.png,helen,normal
4300,/home/lucas/datasets/helen_crop_face/314447301...,3144473012_2.png,helen,normal


# 3. Aplicando ruído - Somente Salt Pepper

In [16]:
df_salt_pepper = df_base.copy()
df_salt_pepper["noise"] = "salt_pepper"

df_v1 = pd.concat([df_salt_pepper,df_base])
df_v1.reset_index(inplace=True,drop=True)
df_v1

Unnamed: 0,full_path,image_name,dataset,noise
0,/home/lucas/datasets/300W_crop_face/outdoor_08...,outdoor_085.png,300W,salt_pepper
1,/home/lucas/datasets/300W_crop_face/indoor_297...,indoor_297.png,300W,salt_pepper
2,/home/lucas/datasets/300W_crop_face/outdoor_22...,outdoor_227.png,300W,salt_pepper
3,/home/lucas/datasets/300W_crop_face/outdoor_03...,outdoor_031.png,300W,salt_pepper
4,/home/lucas/datasets/300W_crop_face/outdoor_02...,outdoor_023.png,300W,salt_pepper
...,...,...,...,...
8599,/home/lucas/datasets/helen_crop_face/253368567...,2533685677_2.png,helen,normal
8600,/home/lucas/datasets/helen_crop_face/220369372...,2203693721_2.png,helen,normal
8601,/home/lucas/datasets/helen_crop_face/230071339...,2300713390_1.png,helen,normal
8602,/home/lucas/datasets/helen_crop_face/314447301...,3144473012_2.png,helen,normal


## 3.1 Separando em treino, teste e validação

In [50]:
train_list = []
test_list = []
val_list = []

for dataset in tqdm(df_v1.dataset.unique()):
    aux = df_v1[df_v1['dataset'] == dataset]
    train, test = train_test_split(aux,test_size=0.30,stratify=aux["noise"])
    test, val = train_test_split(test,test_size=0.50,stratify=test["noise"])
    
    train_list.append(train.copy())
    test_list.append(test.copy())
    val_list.append(val.copy())

train = pd.concat(train_list)
train["split"] = "train"
test = pd.concat(test_list)
test["split"] = "test"
val = pd.concat(val_list)
val["split"] = "val"

df_v1 = pd.concat([train,val,test])

  0%|          | 0/4 [00:00<?, ?it/s]

In [33]:
train.shape

(6022, 4)

In [34]:
test.shape

(1290, 4)

In [35]:
val.shape

(1292, 4)

## 3.2 Salvando dataframe

In [52]:
df_v1.to_csv("/home/lucas/datasets/dataframe_v1.cv",index=False)

In [53]:
df_v1.shape

(8604, 5)