Using this notebook to explore and examine some of the data

In [1]:
import os
os.chdir('..')

In [42]:
from skimage import data
import numpy as np
import matplotlib.pyplot as plt
from skimage import io

import warnings
warnings.filterwarnings('ignore')

filepath = './data/ai4mars-dataset-merged-0.1/msl/images/edr/NLA_397681398EDR_F0020000AUT_04096M1.JPG'

image = io.imread(filepath)
plt.imshow(image, cmap='gray')

In [None]:
image.shape

In [None]:
img = image[:, :, :3].copy()

In [None]:
from skimage.color import gray2rgb

i = gray2rgb(filepath)
plt.imshow(i)

Works cited for exploration

https://neptune.ai/blog/image-segmentation
https://learnopencv.com/pytorch-for-beginners-semantic-segmentation-using-torchvision/

Note: This dataset is different than the LabelMars dataset (labelmars.net) because they use ~20 classes which are really only known to geologists or subject matter experts in the field of planetary science. 

Our dataset simplifies the labels for terrain navigation purposes. In other words, we use a small number of labels that will be used for navigation only (sand, soil, bedrock, and big rocks). No need for the scientific jargon for navigation purposes.

In addition, the number of labels in the AI4Mars dataset is two orders of magnitude larger than the LabelMars dataset. 

Need to move mars data to one location in a cleaned up directory

In [None]:
import os
os.chdir('..')

In [None]:
from pathlib import Path

image_path = Path(r"C:\Users\Shoon\repos\DGMDE-17-Final-Project\data\ai4mars-dataset-merged-0.1")

In [None]:
print(image_path)

In [None]:
image_name = "NLB_615003887EDR_F0761384NCAM00223M1.png"
mask_path = os.path.join(image_path, "msl/labels/train", image_name)

print(mask_path)

In [None]:
from PIL import Image
img = Image.open(mask_path)
img

In [None]:
import numpy as np

In [None]:
np_img = np.array(img)

In [None]:
np_img.shape

Programmatically reading mars dataset
* https://www.kaggle.com/code/yash92328/ai4mars-starter-notebook
* https://www.kaggle.com/code/leonardoolivi/esame-real-mars

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import matplotlib.pyplot as plt
from PIL import Image
import cv2
from tqdm import tqdm

In [None]:
os.chdir("..")

In [None]:
images = "data/ai4mars-dataset-merged-0.1/msl/images"
tr_lab = "data/ai4mars-dataset-merged-0.1/msl/labels/train"
edr = images + "/edr"
mxy = images + "/mxy" # not required
rng = images + "/rng-30m" # not required

In [None]:
edr_files = os.listdir(edr)
trlab_files = os.listdir(tr_lab)

X = []
y = []
c = 0

# preparing X and y
for lab_name in trlab_files:
    img_name = lab_name[:-4] + ".JPG"
    
    if img_name in edr_files:
        
        img_path = os.path.join(edr, img_name)
        img_arr = cv2.imread(img_path)
        img_arr = cv2.resize(img_arr, dsize = (224, 224))
        
        lab_path = os.path.join(tr_lab, lab_name)
        lab_arr = cv2.imread(lab_path, 0)
        lab_arr = cv2.resize(lab_arr, (224, 224), interpolation = cv2.INTER_NEAREST)
        
        X.append(img_arr)
        y.append(lab_arr)
        
    c += 1
    if c >= 1000:
        break
        
X = np.asarray(X, dtype = np.float32) / 255.0
y = np.array(y, dtype = np.uint8)

# 0 - soil
# 1 - bedrock
# 2 - sand
# 3 - big rock
# 255 -> 4 - NULL (no label)


# keeping integer values in labels will help us in segmentation task (UNet)
y[y==255] = 4

print(X.shape, y.shape)

In [None]:
plt.imshow(X[255])

In [None]:
X[255].shape

In [None]:
print(np.unique(y[255]))

plt.figure(figsize = (10, 10))

plt.imshow(X[255])
plt.imshow(y[255], alpha = 0.1)

plt.show()

### Examining the S5Mars dataset

In [2]:
import json

mars_data_path = "./data/s5mars_data/S5Mars_data/"

In [7]:

test_obj = open(os.path.join(mars_data_path, "split/test.json"))
test_split = json.load(test_obj)
test_obj.close()

train_obj = open(os.path.join(mars_data_path, "split/train.json"))
train_split = json.load(train_obj)
train_obj.close()

val_obj = open(os.path.join(mars_data_path, "split/val.json"))
val_split = json.load(val_obj)
val_obj.close()

In [9]:
train_split[:5]

['hard/0725MR0030950010402846E01_DXXX',
 'hard/0744MR0031940310403356E01_DXXX',
 'hard/1292ML0060930000405196E01_DXXX',
 'easy/0303MR0012610240203842E01_DXXX',
 'easy/0318MR0013020710300678E01_DXXX']

In [32]:
training_dest   = os.path.join("./data/s5mars_data/", 'train')
testing_dest    = os.path.join("./data/s5mars_data/", 'test')
validation_dest = os.path.join("./data/s5mars_data/", 'val')

if not os.path.exists(training_dest):
    os.makedirs(training_dest)
if not os.path.exists(testing_dest):
    os.makedirs(testing_dest)
if not os.path.exists(validation_dest):
    os.makedirs(validation_dest)

In [50]:
import shutil

for img in train_split:
    source = os.path.join(mars_data_path, 'images', img) + '.jpg'
    dest = os.path.join(training_dest, 'Image')
    shutil.copy(source, dest)
# for img in test_split:
#     source = os.path.join(mars_data_path, 'labels', img) + '.png'
#     dest = os.path.join(testing_dest, 'masks')
#     shutil.copy(source, dest)
# for img in val_split:
#     source = os.path.join(mars_data_path, 'labels', img) + '.png'
#     dest = os.path.join(validation_dest, 'masks')
#     shutil.copy(source, dest)

### Making all the images the same size
* Why this wasn't done in the first place is beyond me
* Starting with 500x500

In [59]:
import shutil
import skimage
import numpy as np

# Location of data
dest = os.path.join(validation_dest, 'Mask')

for img_name in os.listdir(dest):
    # print(img)
#     source = os.path.join(mars_data_path, 'labels', img) + '.png'
#     dest = os.path.join(training_dest, 'masks', img)

#     # shutil.copy(source, dest)

    img = skimage.io.imread(os.path.join(dest, img_name))
    img = skimage.transform.resize(np.array(img), (500, 500))#.astype(np.uint8)
    skimage.io.imsave(os.path.join(dest, 'resized', img_name), img)

    




ValueError: ImageIO does not generally support reading folders. Limited support may be available via specific plugins. Specify the plugin explicitly using the `plugin` kwarg, e.g. `plugin='DICOM'`

Copy images to a correct training folder

In [None]:
https://www.kaggle.com/code/leonardoolivi/esame-real-mars/notebook
https://arxiv.org/pdf/2202.00791.pdf
https://towardsdatascience.com/transfer-learning-for-segmentation-using-deeplabv3-in-pytorch-f770863d6a42
https://pytorch.org/hub/pytorch_vision_deeplabv3_resnet101/
https://jhang2020.github.io/S5Mars.github.io/
https://arxiv.org/pdf/2207.01200.pdf

https://www.kaggle.com/code/dhananjay3/image-segmentation-from-scratch-in-pytorch
https://www.kaggle.com/code/ligtfeather/semantic-segmentation-is-easy-with-pytorch#Evaluation