In [1]:
import pandas as pd
from pathlib import Path
import os

In [7]:
def get_repo_path(path : 'str', repo_name : 'str' = 'NOV24-BDS---Covid19-groupe-1') -> 'str':
    """
    This function return the repository path depending on its name 'repo_name'. This function is working only if the current program is running in child folders
    of repository.
    args:
        path: str - path including the repository name repo_name (default : 'NOV24-BDS---Covid19-groupe-1')
        repo_name: str - repository's name (default : 'NOV24-BDS---Covid19-groupe-1')
    returns:
        repo_path: str - repository's absolute path
    """
    parts = path.parts # split path in list of each folders names
    for i, part in enumerate(parts):
        if part == repo_name:
            return(Path(*parts[:i+1])) # return the new path through the repo_name
    raise ValueError(f'There is no repo directory {repo_name} (set by repo_name) in the path') # raise ValueError if no repo_name find in path

In [6]:
def get_absolute_path(path : str, repo_name : 'str' = 'NOV24-BDS---Covid19-groupe-1') -> str:
    r"""
    This function return the absolute path of path which must be relative path from repo folder (ex: data\raw\COVID\images\COVID-1.png)
    args:
        path : str - relative path from repo folder(ex: 'data\raw\COVID\images\COVID-1.png')
        repo_name : str - repository's name (default : 'NOV24-BDS---Covid19-groupe-1')
    returns:
        absolute_path : str - absolute path of path
    """
    # get the folder in which the current program is 
    try :
        parent_folder_path = Path(__file__).resolve().parent # __file__ is accessible in .py files but not in .ipynb files
    except NameError:
        import os
        parent_folder_path = Path(os.getcwd()).resolve() # for .ipynb files
    repo_path = get_repo_path(path = parent_folder_path, repo_name = repo_name) # get the github repo root path on the local computer
    return repo_path / path # return the repo_path concatenate with path which is relative path from the repo

In [2]:
# Ouverture du fichier de métadonnées
df = pd.read_csv(r"..\..\data\processed\metadatas.csv")

In [3]:
from notebooks.AntoineB.Libraries.add_url import add_url

# Ajout des liens des images et masques dans les métadonnées
add_url(df)
add_url(df, file_type='mask')

In [5]:
df.tail()

Unnamed: 0,FILE NAME,FORMAT,SIZE,URL,LABEL,IMG_URL,MASK_URL
21160,Lung_Opacity-6008,PNG,256*256,https://www.kaggle.com/c/rsna-pneumonia-detect...,LUNG OPACITY,data\raw\LUNG OPACITY\images\Lung_Opacity-6008...,data\raw\LUNG OPACITY\masks\Lung_Opacity-6008.png
21161,Lung_Opacity-6009,PNG,256*256,https://www.kaggle.com/c/rsna-pneumonia-detect...,LUNG OPACITY,data\raw\LUNG OPACITY\images\Lung_Opacity-6009...,data\raw\LUNG OPACITY\masks\Lung_Opacity-6009.png
21162,Lung_Opacity-6010,PNG,256*256,https://www.kaggle.com/c/rsna-pneumonia-detect...,LUNG OPACITY,data\raw\LUNG OPACITY\images\Lung_Opacity-6010...,data\raw\LUNG OPACITY\masks\Lung_Opacity-6010.png
21163,Lung_Opacity-6011,PNG,256*256,https://www.kaggle.com/c/rsna-pneumonia-detect...,LUNG OPACITY,data\raw\LUNG OPACITY\images\Lung_Opacity-6011...,data\raw\LUNG OPACITY\masks\Lung_Opacity-6011.png
21164,Lung_Opacity-6012,PNG,256*256,https://www.kaggle.com/c/rsna-pneumonia-detect...,LUNG OPACITY,data\raw\LUNG OPACITY\images\Lung_Opacity-6012...,data\raw\LUNG OPACITY\masks\Lung_Opacity-6012.png


In [38]:
# Enregistrement des nouvelles métadonnées
df.to_csv(r'..\..\data\processed\metadatas.csv', sep=',', encoding='utf-8', index=False, header=True)