<a href="https://colab.research.google.com/github/MNLepage08/MNLepage08/blob/main/Code/Import_Dataset_from_GitHub.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%%capture
!pip install distinctipy jupyter matplotlib pandas pillow torchtnt==0.2.0 tqdm tabulate

In [None]:
%%capture
!pip install cjm_pandas_utils cjm_pil_utils cjm_psl_utils cjm_pytorch_utils cjm_torchvision_tfms

In [None]:
import datetime
from functools import partial
from glob import glob
import json
import math
import multiprocessing
import os
from pathlib import Path
import random
from typing import Any, Dict, Optional

from cjm_psl_utils.core import download_file, file_extract, get_source_code
from cjm_pil_utils.core import resize_img, get_img_files, stack_imgs
from cjm_pytorch_utils.core import pil_to_tensor, tensor_to_pil, get_torch_device, set_seed, denorm_img_tensor, move_data_to_device
from cjm_pandas_utils.core import markdown_to_pandas, convert_to_numeric, convert_to_string
from cjm_torchvision_tfms.core import ResizeMax, PadSquare, CustomRandomIoUCrop

# Import the distinctipy module
from distinctipy import distinctipy

# Import matplotlib for creating plots
import matplotlib.pyplot as plt

# Import numpy
import numpy as np

# Import the pandas package
import pandas as pd

# Set options for Pandas DataFrame display
pd.set_option('max_colwidth', None)  # Do not truncate the contents of cells in the DataFrame
pd.set_option('display.max_rows', None)  # Display all rows in the DataFrame
pd.set_option('display.max_columns', None)  # Display all columns in the DataFrame

# Import PIL for image manipulation
from PIL import Image, ImageDraw

# Import PyTorch dependencies
import torch
from torch.amp import autocast
from torch.cuda.amp import GradScaler
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchtnt.utils import get_module_summary
import torchvision
torchvision.disable_beta_transforms_warning()
from torchvision.tv_tensors import BoundingBoxes, Mask
from torchvision.utils import draw_bounding_boxes, draw_segmentation_masks
import torchvision.transforms.v2  as transforms
from torchvision.transforms.v2 import functional as TF

# Import Mask R-CNN
from torchvision.models.detection import maskrcnn_resnet50_fpn_v2, MaskRCNN
from torchvision.models.detection import MaskRCNN_ResNet50_FPN_V2_Weights
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor

# Import tqdm for progress bar
from tqdm.auto import tqdm

In [None]:
# The name for the project
project_name = f"pytorch-mask-r-cnn-instance-segmentation"

# The path for the project folder
project_dir = Path(f"./{project_name}/")

# Create the project directory if it does not already exist
project_dir.mkdir(parents=True, exist_ok=True)

# Define path to store datasets
dataset_dir = Path("./Dataset/")
# Create the dataset directory if it does not exist
dataset_dir.mkdir(parents=True, exist_ok=True)

pd.Series({
    "Project Directory:": project_dir,
    "Dataset Directory:": dataset_dir
}).to_frame().style.hide(axis='columns')

0,1
Project Directory:,pytorch-mask-r-cnn-instance-segmentation
Dataset Directory:,Dataset


In [None]:
# Set the name of the dataset
dataset_name = 'GLO7030_projet'

# Construct the GitHub repository name
gh_repo = f'JasmRicard/{dataset_name}'

# Create the path to the directory where the dataset will be extracted
dataset_path = Path(f'{dataset_dir}/{dataset_name}/data/MNT/')

pd.Series({
    "GitHub Repository:": gh_repo,
    "Dataset Path:": dataset_path #/content/GLO7030_projet/data/MNT
}).to_frame().style.hide(axis='columns')

0,1
GitHub Repository:,JasmRicard/GLO7030_projet
Dataset Path:,Dataset/GLO7030_projet/data/MNT


Clone Private GitHub

In [None]:
!wget -q https://raw.githubusercontent.com/tsunrise/colab-github/main/colab_github.py
import colab_github
colab_github.github_auth(persistent_key=True)

Mounted at /content/drive/
Looks that a private key is already created. If you have already push it to github, no action required.
 Otherwise, Please go to https://github.com/settings/ssh/new to upload the following key: 
ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIA5bEhuEBA1Jc4xjH0ew1f+ODpApk2lTduhZrlI6F8dK root@6e4a0db97306

Please use SSH method to clone repo.


In [None]:
!git clone {f'git@github.com:JasmRicard/GLO7030_projet.git'} {dataset_dir/dataset_name}

Cloning into 'Dataset/GLO7030_projet'...
remote: Enumerating objects: 95, done.[K
remote: Counting objects: 100% (76/76), done.[K
remote: Compressing objects: 100% (49/49), done.[K
remote: Total 95 (delta 24), reused 75 (delta 24), pack-reused 19[K
Receiving objects: 100% (95/95), 86.25 MiB | 39.18 MiB/s, done.
Resolving deltas: 100% (27/27), done.


In [None]:
img_file_paths = get_img_files(f'/content/{dataset_path}')

pd.DataFrame({"Image File": [file.name for file in os.scandir(img_dir_path) if file.is_file()]}).head()

Unnamed: 0,Image File
0,G15_031014_100.tif
1,G07_180619_100.tif
2,G11_290919_100.tif
3,G11_300919_100.tif
4,G11_310813_100.tif
