# Libraries

In [5]:
import os, os.path
import numpy as np
from pathlib import Path

import pandas as pd

# Functions

## Directory

In [6]:
def main_directory(place='local'):
    """This function is to set the path of the working place, depending if we are working online or local

    Args:
        place (str): Name of the place. Two options. Defaults to 'local'.
    """
    if (place=='local'):
        os.chdir('/home/ricardino/Documents/MAIA/tercer_semestre/CAD/Projecte_1')
    elif (place=='online'):
        os.chdir('/content/drive/MyDrive/MAIA/3rd_semester/CAD/Projecte_1')
    else:
        print('Not accepted working place, try again')
        
    return os.getcwd()

# Set working directory
main_path = main_directory(place='local')

## Retrive paths

In [198]:
def get_paths(classif, set_name, class_name):
    """This function extracts the paths of all files in a folder, given the classifiation problem (binary, 3-class), the set name (train-val-test), and the class name
    (endpoint) of the diagnosis.
    It returns a list with all the paths in string.

    Args:
        classif (str): binary or 3-class
        set_name (str): train, val or test
        class_name (str): lesions name (nevus, etc.)
    """
    #path of the folder to check   
    folder_path = main_path + f'/images/{classif}/{set_name}/{class_name}'
    p = Path(folder_path).relative_to(main_path).glob('**/*')
    files = [x for x in p if x.is_file()]
    p = Path(folder_path).relative_to(main_path).glob('**/*')
    classes = [str(x.name)[:3] for x in p if x.is_file()]
    
    return files, classes


def info_dataframe(classif, set_name, class_name):
    """Returns df with Path, classification, set and class information

    Args:
        classif (_type_): _description_
        set_name (_type_): _description_
        class_name (_type_): _description_

    Returns:
        _type_: _description_
    """
    df = pd.DataFrame() #Save paths in df
    paths_list, classes = get_paths(classif, set_name, class_name) #Estract paths and classes
    paths = pd.DataFrame({ #save in frame
        'path': paths_list, 
        'class': classes
    })
    paths = paths.sort_values('path') #Sort paths by alphabetic order
    df = pd.concat([df,paths], ignore_index=True) #Concatenate with previous info
    df['classif'] = f'{classif}'
    df['set'] = f'{set_name}'
    
    #Assigning label
    if classif=='binary':
        df.loc[df['class'] =='nev', 'label'] = 1
        df.loc[df['class'] !='nev', 'label'] = 0
    if classif=='3_classes':
        df.loc[df['class'] =='bcc', 'label'] = 0
        df.loc[df['class'] =='mel', 'label'] = 1
        df.loc[df['class'] =='scc', 'label'] = 2
        
    
    return df

# Getting Metadata

In [210]:
#Set classification task
classif_list = ['binary','3_classes']
set_list = ['train','val']

allInfo_df = pd.DataFrame() #Empty dataframe
for classif in classif_list:    
    for set_name in set_list:
        if classif=='binary':
            class_list = ['nevus', 'others']
        else:
            class_list = ['bcc', 'mel', 'scc']
        for class_name in class_list:

            currentInfo = info_dataframe(classif, set_name, class_name)
            allInfo_df = pd.concat([allInfo_df,currentInfo], ignore_index=True)

allInfo_df['ID'] = allInfo_df.index
allInfo_df.to_csv('data/meta_info.csv', sep='\t', index=False)

# Creating class to manipulate metadata

## Classes

In [249]:
class path_label():
    def __init__(self, meta, classif, set_name) -> None:
        meta = meta.loc[meta['classif'] == classif] #Filter by classif
        meta = meta.loc[meta['set'] == set_name] #Filter by set
        self.paths = list(meta.path)
        self. labels = np.array(meta.label)

## Implementation

In [236]:
#Read metadata file
meta = pd.read_csv('data/meta_info.csv', sep='\t')

In [254]:
classif='binary'; set_name='train' #Situation
#Definition of information object
info = path_label(meta, classif, set_name)

