In [None]:
from google.colab import drive

drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Let's declare some global variable to be used in this step

In [None]:
import os
import random
from PIL import Image
from matplotlib.pyplot import imshow

ROOT_DIR = "/content/drive/Shareddrives/Giaquinta_Pasqualetti/"
ORIGINAL_DIR = os.path.join(ROOT_DIR, "EuroSAT_RGB")
IMAGES_DIR = os.path.join(ROOT_DIR, "Data")
TRAIN_DIR = os.path.join(IMAGES_DIR, "Train")
TEST_DIR = os.path.join(IMAGES_DIR, "Test")

IMAGE_DIM = 64
BATCH_SIZE = 16
RAN_SEED = 10024062

VALIDATION_SPLIT = 0.1
TRAIN_SPLIT = 0.2

random.seed(RAN_SEED)

Let's remove the 2 folders (if we are execuiting this notebook again), and unzip the Dataset

In [None]:
# ! rm -r /content/drive/Shareddrives/Giaquinta_Pasqualetti/EuroSAT_RGB
# ! rm -r /content/drive/Shareddrives/Giaquinta_Pasqualetti/Data
! unzip -q /content/drive/Shareddrives/Giaquinta_Pasqualetti/EuroSAT_RGB.zip -d /content/drive/Shareddrives/Giaquinta_Pasqualetti/

Take a look at Dataset classes composition

In [None]:
for i in os.listdir(ORIGINAL_DIR):
    v = os.listdir(os.path.join(ORIGINAL_DIR,i))
    print(f"{i}: {len(v)}")

Forest: 3000
River: 2500
Highway: 2500
AnnualCrop: 3000
SeaLake: 3000
HerbaceousVegetation: 3000
Industrial: 2500
Residential: 3000
PermanentCrop: 2500
Pasture: 2000


In [None]:
# Takes an image as input at produce a modified version (flipped or rotated)
def rotate_or_flip(img):
    flip = [Image.FLIP_LEFT_RIGHT, Image.FLIP_TOP_BOTTOM]
    rotate = [Image.ROTATE_90, Image.ROTATE_180, Image.ROTATE_270]

    rotOrFlip = random.randint(0,1)
    if rotOrFlip:
        return img.transpose(flip[random.randint(0,1)])
    else:
        return img.transpose(rotate[random.randint(0,1)])

In [None]:
classes = os.listdir(ORIGINAL_DIR)

# First of all, create the new folder if it doesn't exist
if not os.path.exists(IMAGES_DIR):
    os.makedirs(IMAGES_DIR)
    os.makedirs(os.path.join(IMAGES_DIR, "Test"))
    os.makedirs(os.path.join(IMAGES_DIR, "Train"))

    for f in os.listdir(IMAGES_DIR):
        p = os.path.join(IMAGES_DIR, f)
        for c in classes:
            if not os.path.exists(os.path.join(p, c)):
                os.makedirs(os.path.join(p, c))

After execuiting that command, we created the following directory tree:
  - +Data
  - |
  - +-----+ Train
  - |     +---- Pasture
  - |     +---- Industrial
  - |     +---- ...
  - |
  - +-----+Test
  - |     +---- Pasture
  - |     +---- Industrial
  - |     +---- ...


Now, eavenly divide each class in Train and Split, and compy them in th previously created Data folder

In [None]:
import numpy as np
import shutil

for c in classes:
    fromDir = os.path.join(ORIGINAL_DIR, c)
    toDirTrain = os.path.join(TRAIN_DIR, c)
    toDirTest = os.path.join(TEST_DIR, c)
    images = os.listdir(fromDir)

    # Shuffle and divide the images
    np.random.shuffle(images)
    trainFiles, testFiles = np.split(np.array(images),[int(len(images) * (1-TRAIN_SPLIT))])

    # Copy those selected as Train, in the Data/Train folder
    for f in trainFiles.tolist():
        shutil.copy(os.path.join(fromDir, f), os.path.join(toDirTrain, f))

    for f in testFiles.tolist():
        shutil.copy(os.path.join(fromDir, f), os.path.join(toDirTest, f))


Control on the divisions

In [None]:
for c in classes:
    trainDir = os.path.join(TRAIN_DIR, c)
    testDir = os.path.join(TEST_DIR, c)

    print(f"{c}:\t\tTrain-{len(os.listdir(trainDir))}\tTest-{len(os.listdir(testDir))}")

Forest:		Train-2400	Test-600
River:		Train-2000	Test-500
Highway:		Train-2000	Test-500
AnnualCrop:		Train-2400	Test-600
SeaLake:		Train-2400	Test-600
HerbaceousVegetation:		Train-2400	Test-600
Industrial:		Train-2000	Test-500
Residential:		Train-2400	Test-600
PermanentCrop:		Train-2000	Test-500
Pasture:		Train-1600	Test-400


The ***Pasture*** class has 1600 samples, while all of the others range from 2000 or 2400. We solve this issue with augmentation, by adding 400 new Pasture images

In [None]:

pastureDir = os.path.join(TRAIN_DIR, "Pasture")
pastureList = os.listdir(pastureDir)
# --------

toAugment = random.sample(pastureList, 400)
for i in toAugment:
    imgPath = os.path.join(pastureDir, i)
    img = Image.open(imgPath)
    mod = rotate_or_flip(img)
    mod.save(pastureDir+"/MOD"+i)

print(f"New Pasture count: {len(os.listdir(pastureDir))}")

New Pasture count: 2000


In [None]:
for c in classes:
    trainDir = os.path.join(TRAIN_DIR, c)
    testDir = os.path.join(TEST_DIR, c)

    print(f"{c}:\t\tTrain-{len(os.listdir(trainDir))}\tTest-{len(os.listdir(testDir))}")

Forest:		Train-2400	Test-600
River:		Train-2000	Test-500
Highway:		Train-2000	Test-500
AnnualCrop:		Train-2400	Test-600
SeaLake:		Train-2400	Test-600
HerbaceousVegetation:		Train-2400	Test-600
Industrial:		Train-2000	Test-500
Residential:		Train-2400	Test-600
PermanentCrop:		Train-2000	Test-500
Pasture:		Train-2000	Test-400


Sto coso dovrebbe creare un zip, ma su Colab poi non mette i file dentro il drive, quindi tocca creare lo Zip e poi spostarlo a mano (Come ho gia fatto)

In [None]:
! zip -r -q /content/drive/Shareddrives/Giaquinta_Pasqualetti/Data2.zip /content/drive/Shareddrives/Giaquinta_Pasqualetti/Data