In [1]:
import os
import pandas as pd
import torch
import numpy as np
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm

In [2]:
def shift_landmarks(df, x, y, idx):

    shift_df = df.copy()
    print("Shifting landmarks by x amount: %d, y amount: %d." % (x,y))
#     for i in tqdm(range(shift_df.shape[0])):
#         if (pd.isnull(df.iloc[i, 88])):
#             x_col = [*range(1,86,2)]
#             y_col = [*range(2,87,2)]
#         else:
#             x_col = [*range(1,146,2)]
#             y_col = [*range(2,147,2)]
            
#         for j in range(len(x_col)):
#             shift_df.iat[i, x_col[j]] += x
#             shift_df.iat[i, y_col[j]] += y

    if (pd.isnull(df.iloc[idx, 88])):
        x_col = [*range(1,86,2)]
        y_col = [*range(2,87,2)]
    else:
        x_col = [*range(1,146,2)]
        y_col = [*range(2,147,2)]

    for j in tqdm(range(len(x_col))):
        shift_df.iat[idx, x_col[j]] += x
        shift_df.iat[idx, y_col[j]] += y

    print("Shift complete.")
    return shift_df

def remove_bad_entries(df, img_dir):    
    images = df.iloc[:, 0]
    
    cleansed_df = df.copy()
    print("Original shape of dataframe:", df.shape)
    del_idx = []
    for i, path in enumerate(tqdm(images)):
        if not os.path.exists(img_dir+str(path)+".tiff"):
            print("File does not exist:", img_dir+str(path)+".tiff")
            del_idx.append(i)

    cleansed_df = cleansed_df.drop(cleansed_df.index[del_idx])
    
    del_idx = []
    for i in tqdm(range(cleansed_df.shape[0])):
        if (pd.isnull(cleansed_df.iloc[i, 88])):
            landmarks = cleansed_df.iloc[i, 1:87]
        else:
            landmarks = cleansed_df.iloc[i, 1:147]  
            
        landmarks = np.array([landmarks])
        landmarks = landmarks.astype('float').reshape(-1, 2)
        if torch.isnan(torch.from_numpy(landmarks)).any():
            print("Row %d has incorrect amount of landmarks"%i)
            del_idx.append(i)
        
    cleansed_df = cleansed_df.drop(cleansed_df.index[del_idx])
    
    print("Removed bad entries...")
    print("New shape of dataframe:", cleansed_df.shape)
    print("\n")
    return cleansed_df

def cleanseANDconcat_multi_CSV(csv_list, root_dir):
    print("Cleansing and Concatenating %d dataframes... \n" % len(csv_list))
    concat_data = []
    for csv in csv_list:
        df = pd.read_csv(csv)
        print("Cleansing: ",csv)
        df = remove_bad_entries(df, root_dir)
        concat_data.append(df)
    
    concat_df = pd.concat(concat_data, ignore_index=True)
    print("Process complete.")
    return concat_df

def split_frontANDprofile(df):
    
    profile_df = df.copy()
    front_df = df.copy()
    pdel_idx = []
    fdel_idx = []
    
    for i in tqdm(range(df.shape[0])):
        if (pd.isnull(df.iloc[i, 88])):
            fdel_idx.append(i)
        else:
            pdel_idx.append(i)
            
    profile_df = profile_df.drop(profile_df.index[pdel_idx])
    front_df = front_df.drop(front_df.index[fdel_idx])
    
    print("Split dataframe into profile views dataframe and front views dataframe.")
    return profile_df, front_df

def create_frontVSprofile_csv(df):
    IDs = df.iloc[:,0]
    data = []
    front = 0
    profile = 1
    
    for i in tqdm(range(df.shape[0])):
        if (pd.isnull(df.iloc[i, 88])):
            data.append({"File": str(IDs[i]).replace('R','N',1)+".jpg",
                         "Class": profile
                        })
        else:
            data.append({"File": str(IDs[i]).replace('R','N',1)+".jpg",
                         "Class": front
                        })
            
    new_df = pd.DataFrame(data)
    
    return new_df

In [3]:
csv_file='.\\S8.csv'
root_dir='.\\Master\\'
csv = pd.read_csv(csv_file)
print(csv.shape)
csv.head()

(521, 152)


Unnamed: 0,ID,x0,y0,x1,y1,x2,y2,x3,y3,x4,...,y70,x71,y71,x72,y72,Distance,RH,Airflow,env-temp,Sensation
0,R811010,240,193,234,206,228,217,220,227,208,...,,,,,,1.0,53.794444,0.508,21.640111,-1
1,R811011,130,176,127,187,125,199,123,212,122,...,103.0,202.0,108.0,210.0,125.0,1.0,53.794444,0.508,21.640111,-1
2,R811012,136,190,136,201,136,213,136,226,140,...,100.0,218.0,106.0,230.0,120.0,1.0,53.794444,0.508,21.640111,-1
3,R811013,142,195,145,206,149,216,155,227,161,...,102.0,219.0,104.0,234.0,114.0,1.0,53.794444,0.508,21.640111,-1
4,R811014,145,193,150,204,155,213,161,222,168,...,100.0,226.0,107.0,242.0,119.0,1.0,53.794444,0.508,21.640111,-1


In [4]:
new_csv = remove_bad_entries(csv, root_dir)

Original shape of dataframe: (521, 152)


  0%|          | 0/521 [00:00<?, ?it/s]

File does not exist: .\Master\R81141.tiff
File does not exist: .\Master\R82281.tiff
File does not exist: .\Master\R84281.tiff


  0%|          | 0/518 [00:00<?, ?it/s]

Row 194 has incorrect amount of landmarks
Removed bad entries...
New shape of dataframe: (517, 152)




In [5]:
print(new_csv.shape)
new_csv.head()

(517, 152)


Unnamed: 0,ID,x0,y0,x1,y1,x2,y2,x3,y3,x4,...,y70,x71,y71,x72,y72,Distance,RH,Airflow,env-temp,Sensation
0,R811010,240,193,234,206,228,217,220,227,208,...,,,,,,1.0,53.794444,0.508,21.640111,-1
1,R811011,130,176,127,187,125,199,123,212,122,...,103.0,202.0,108.0,210.0,125.0,1.0,53.794444,0.508,21.640111,-1
2,R811012,136,190,136,201,136,213,136,226,140,...,100.0,218.0,106.0,230.0,120.0,1.0,53.794444,0.508,21.640111,-1
3,R811013,142,195,145,206,149,216,155,227,161,...,102.0,219.0,104.0,234.0,114.0,1.0,53.794444,0.508,21.640111,-1
4,R811014,145,193,150,204,155,213,161,222,168,...,100.0,226.0,107.0,242.0,119.0,1.0,53.794444,0.508,21.640111,-1


In [6]:
shift_csv = shift_landmarks(new_csv, 0, -15, 0)
print(shift_csv.shape)
shift_csv.head()

Shifting landmarks by x amount: 0, y amount: -15.


  0%|          | 0/43 [00:00<?, ?it/s]

Shift complete.
(517, 152)


Unnamed: 0,ID,x0,y0,x1,y1,x2,y2,x3,y3,x4,...,y70,x71,y71,x72,y72,Distance,RH,Airflow,env-temp,Sensation
0,R811010,240,178,234,191,228,202,220,212,208,...,,,,,,1.0,53.794444,0.508,21.640111,-1
1,R811011,130,176,127,187,125,199,123,212,122,...,103.0,202.0,108.0,210.0,125.0,1.0,53.794444,0.508,21.640111,-1
2,R811012,136,190,136,201,136,213,136,226,140,...,100.0,218.0,106.0,230.0,120.0,1.0,53.794444,0.508,21.640111,-1
3,R811013,142,195,145,206,149,216,155,227,161,...,102.0,219.0,104.0,234.0,114.0,1.0,53.794444,0.508,21.640111,-1
4,R811014,145,193,150,204,155,213,161,222,168,...,100.0,226.0,107.0,242.0,119.0,1.0,53.794444,0.508,21.640111,-1


In [7]:
csv_list = ["S3-b.csv","S5.csv","S6.csv","S7.csv","S8.csv","S10.csv"]
master_csv = cleanseANDconcat_multi_CSV(csv_list, root_dir)
print(master_csv.shape)
master_csv 

Cleansing and Concatenating 6 dataframes... 

Cleansing:  S3-b.csv
Original shape of dataframe: (512, 152)


  0%|          | 0/512 [00:00<?, ?it/s]

  0%|          | 0/512 [00:00<?, ?it/s]

Row 280 has incorrect amount of landmarks
Row 472 has incorrect amount of landmarks
Removed bad entries...
New shape of dataframe: (510, 152)


Cleansing:  S5.csv
Original shape of dataframe: (651, 152)


  0%|          | 0/651 [00:00<?, ?it/s]

File does not exist: .\Master\R54101.tiff
File does not exist: .\Master\R54141.tiff
File does not exist: .\Master\R54281.tiff
File does not exist: .\Master\R55101.tiff
File does not exist: .\Master\R55141.tiff
File does not exist: .\Master\R55181.tiff
File does not exist: .\Master\R55221.tiff
File does not exist: .\Master\R55281.tiff


  0%|          | 0/643 [00:00<?, ?it/s]

Row 88 has incorrect amount of landmarks
Row 177 has incorrect amount of landmarks
Row 506 has incorrect amount of landmarks
Row 512 has incorrect amount of landmarks
Removed bad entries...
New shape of dataframe: (639, 152)


Cleansing:  S6.csv
Original shape of dataframe: (521, 152)


  0%|          | 0/521 [00:00<?, ?it/s]

File does not exist: .\Master\R63281.tiff


  0%|          | 0/520 [00:00<?, ?it/s]

Row 304 has incorrect amount of landmarks
Row 453 has incorrect amount of landmarks
Removed bad entries...
New shape of dataframe: (518, 152)


Cleansing:  S7.csv
Original shape of dataframe: (525, 152)


  0%|          | 0/525 [00:00<?, ?it/s]

File does not exist: .\Master\R71141.tiff
File does not exist: .\Master\R72181.tiff
File does not exist: .\Master\R73221.tiff


  0%|          | 0/522 [00:00<?, ?it/s]

Row 278 has incorrect amount of landmarks
Removed bad entries...
New shape of dataframe: (521, 152)


Cleansing:  S8.csv
Original shape of dataframe: (521, 152)


  0%|          | 0/521 [00:00<?, ?it/s]

File does not exist: .\Master\R81141.tiff
File does not exist: .\Master\R82281.tiff
File does not exist: .\Master\R84281.tiff


  0%|          | 0/518 [00:00<?, ?it/s]

Row 194 has incorrect amount of landmarks
Removed bad entries...
New shape of dataframe: (517, 152)


Cleansing:  S10.csv
Original shape of dataframe: (512, 152)


  0%|          | 0/512 [00:00<?, ?it/s]

File does not exist: .\Master\nan.tiff
File does not exist: .\Master\nan.tiff


  0%|          | 0/510 [00:00<?, ?it/s]

Row 103 has incorrect amount of landmarks
Row 142 has incorrect amount of landmarks
Row 156 has incorrect amount of landmarks
Row 229 has incorrect amount of landmarks
Row 238 has incorrect amount of landmarks
Row 428 has incorrect amount of landmarks
Removed bad entries...
New shape of dataframe: (504, 152)


Process complete.
(3209, 152)


Unnamed: 0,ID,x0,y0,x1,y1,x2,y2,x3,y3,x4,...,y70,x71,y71,x72,y72,Distance,RH,Airflow,env-temp,Sensation
0,R313410,92.0,69.0,93.0,73.0,95.0,77.0,96.0,80.0,98.0,...,,,,,,,,,,
1,R313411,85.0,66.0,86.0,72.0,88.0,77.0,90.0,82.0,93.0,...,,,,,,,,,,
2,R313412,79.0,70.0,80.0,74.0,81.0,78.0,82.0,81.0,84.0,...,41.0,108.0,43.0,113.0,47.0,,,,,
3,R313413,72.0,71.0,73.0,75.0,74.0,79.0,75.0,83.0,76.0,...,42.0,101.0,43.0,104.0,47.0,,,,,
4,R313414,114.0,68.0,113.0,72.0,112.0,77.0,111.0,82.0,107.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3204,R104285,73.0,78.0,74.0,83.0,75.0,89.0,75.0,94.0,77.0,...,54.0,111.0,54.0,115.0,62.0,2.8,54.99444444,0.0,25.5665,1.0
3205,R104286,73.0,78.0,74.0,84.0,75.0,90.0,78.0,96.0,80.0,...,54.0,112.0,55.0,114.0,63.0,2.8,54.99444444,0.0,25.5665,1.0
3206,R104287,75.0,76.0,76.0,82.0,78.0,89.0,80.0,95.0,84.0,...,53.0,113.0,53.0,115.0,60.0,2.8,54.99444444,0.0,25.5665,1.0
3207,R104288,84.0,74.0,85.0,80.0,86.0,86.0,89.0,92.0,91.0,...,,,,,,2.8,54.99444444,0.0,25.5665,1.0


In [8]:
pf_df, fr_df = split_frontANDprofile(master_csv)
print(pf_df.shape)
print(fr_df.shape)
pf_df

  0%|          | 0/3209 [00:00<?, ?it/s]

Split dataframe into profile views dataframe and front views dataframe.
(1350, 152)
(1859, 152)


Unnamed: 0,ID,x0,y0,x1,y1,x2,y2,x3,y3,x4,...,y70,x71,y71,x72,y72,Distance,RH,Airflow,env-temp,Sensation
0,R313410,92.0,69.0,93.0,73.0,95.0,77.0,96.0,80.0,98.0,...,,,,,,,,,,
1,R313411,85.0,66.0,86.0,72.0,88.0,77.0,90.0,82.0,93.0,...,,,,,,,,,,
4,R313414,114.0,68.0,113.0,72.0,112.0,77.0,111.0,82.0,107.0,...,,,,,,,,,,
5,R313415,113.0,68.0,112.0,72.0,111.0,77.0,110.0,81.0,108.0,...,,,,,,,,,,
6,R313416,89.0,77.0,91.0,81.0,93.0,85.0,95.0,90.0,94.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3195,R1042821,87.0,83.0,86.0,89.0,86.0,98.0,86.0,105.0,88.0,...,,,,,,2.8,54.87222222,0.0,25.60966667,1.0
3196,R1042822,96.0,84.0,96.0,90.0,95.0,96.0,95.0,101.0,95.0,...,,,,,,2.8,54.87222222,0.0,25.60966667,1.0
3197,R1042823,106.0,83.0,104.0,88.0,102.0,93.0,102.0,99.0,102.0,...,,,,,,2.8,54.87222222,0.0,25.60966667,1.0
3207,R104288,84.0,74.0,85.0,80.0,86.0,86.0,89.0,92.0,91.0,...,,,,,,2.8,54.99444444,0.0,25.5665,1.0


In [9]:
# with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
#     print(fr_df)

print(fr_df)

           ID    x0    y0    x1    y1    x2    y2    x3     y3    x4  ...  \
2     R313412  79.0  70.0  80.0  74.0  81.0  78.0  82.0   81.0  84.0  ...   
3     R313413  72.0  71.0  73.0  75.0  74.0  79.0  75.0   83.0  76.0  ...   
8     R313418  66.0  92.0  67.0  96.0  69.0  99.0  71.0  102.0  74.0  ...   
9     R313419  75.0  86.0  76.0  91.0  77.0  95.0  79.0   98.0  82.0  ...   
14    R313423  53.0  90.0  56.0  93.0  59.0  95.0  62.0   97.0  66.0  ...   
...       ...   ...   ...   ...   ...   ...   ...   ...    ...   ...  ...   
3202  R104283  70.0  80.0  69.0  86.0  69.0  91.0  70.0   96.0  71.0  ...   
3203  R104284  72.0  76.0  72.0  81.0  72.0  86.0  73.0   91.0  74.0  ...   
3204  R104285  73.0  78.0  74.0  83.0  75.0  89.0  75.0   94.0  77.0  ...   
3205  R104286  73.0  78.0  74.0  84.0  75.0  90.0  78.0   96.0  80.0  ...   
3206  R104287  75.0  76.0  76.0  82.0  78.0  89.0  80.0   95.0  84.0  ...   

       y70    x71   y71    x72   y72  Distance           RH  Airflow  \
2  

In [10]:
pf_df.to_csv("Master_profile.csv", header=True, index =False)
fr_df.to_csv("Master_front.csv", header=True, index =False)



In [11]:
pvf = create_frontVSprofile_csv(master_csv)
print(pvf.shape)
pvf

  0%|          | 0/3209 [00:00<?, ?it/s]

(3209, 2)


Unnamed: 0,File,Class
0,N313410.jpg,1
1,N313411.jpg,1
2,N313412.jpg,0
3,N313413.jpg,0
4,N313414.jpg,1
...,...,...
3204,N104285.jpg,0
3205,N104286.jpg,0
3206,N104287.jpg,0
3207,N104288.jpg,1


In [12]:
# pvf.to_csv("frontVSprofile.csv", header=True, index =False)
