In [2]:
import torch
import torchvision
import os
import json
import pathlib
from pathlib import Path
import shutil
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset
from PIL import Image

In [2]:
model = torch.hub.load('pytorch/vision:v0.10.0', 'mobilenet_v2', pretrained=True)

Downloading: "https://github.com/pytorch/vision/zipball/v0.10.0" to /home/jason/.cache/torch/hub/v0.10.0.zip


In [3]:
num_classes = 8

print(model.classifier)

model.classifier[1] = torch.nn.Linear(in_features=model.classifier[1].in_features, out_features=num_classes)
print(model.classifier)

Sequential(
  (0): Dropout(p=0.2, inplace=False)
  (1): Linear(in_features=1280, out_features=1000, bias=True)
)
Sequential(
  (0): Dropout(p=0.2, inplace=False)
  (1): Linear(in_features=1280, out_features=10, bias=True)
)


In [11]:
# clear directory

for folder in os.listdir(output_dir):
    dir = Path(output_dir) / Path(folder)
    if (str(dir)).find('.') == -1:
        for file in os.listdir(dir):
            fileDir = dir / Path(file)
            os.remove(fileDir)

In [12]:
output_dir = "./plastic_data/"
input_dir = "./seven_plastics/"

# Create output directories for train, eval, and test sets
train_dir = os.path.join(output_dir, 'train')
eval_dir = os.path.join(output_dir, 'eval')
test_dir = os.path.join(output_dir, 'test')

labelNumberDict = {"1_polyethylene_PET":1, "2_high_density_polyethylene_PE-HD":2, "3_polyvinylchloride_PVC":3,
          "4_low_density_polyethylene_PE-LD":4, "5_polypropylene_PP":5, "6_polystyrene_PS":6, "7_other_resins":7, 
          "8_no_plastic":8}

train_size = 0.7
test_size=0.15
eval_size=0.15

label_dict = {}

os.makedirs(train_dir, exist_ok=True)
os.makedirs(eval_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

# Iterate through each class folder in the input directory
for class_name in os.listdir(input_dir):
    class_dir = os.path.join(input_dir, class_name)
    if os.path.isdir(class_dir):
        print(class_dir)
        # Get the list of images in the class folder
        # for img in os.listdir(class_dir):
        #     if img.endswith('.jpg') or img.endswith('.png'):
        images = [imgPth for imgPth in os.listdir(class_dir) if imgPth.endswith('.jpg') or imgPth.endswith('.png')]
        labels = [labelNumberDict[class_name]] * len(images)
        
        # Split the images into train, eval, and test sets
        train_images, test_images, train_labels, test_labels = train_test_split(images, labels, test_size=test_size+eval_size, random_state=42)
        eval_images, test_images, eval_labels, test_labels = train_test_split(test_images, test_labels, test_size=test_size/(test_size+eval_size), random_state=42)
        
        for imgPth in os.listdir(class_dir):
            if imgPth.endswith('.jpg') or imgPth.endswith('.png'):
                label_dict[imgPth] = labelNumberDict[class_name]
        
        for img in train_images:
            shutil.copy(os.path.join(class_dir, img), os.path.join(train_dir, img))
        for img in eval_images:
            shutil.copy(os.path.join(class_dir, img), os.path.join(eval_dir, img))
        for img in test_images:
            shutil.copy(os.path.join(class_dir, img), os.path.join(test_dir, img))
            
print(label_dict)

dictString = json.dumps(label_dict, indent=4)

with open(os.path.join(output_dir, 'labels.json'), 'w+') as f:
    f.write(dictString)
    


./seven_plastics/6_polystyrene_PS
./seven_plastics/8_no_plastic
./seven_plastics/7_other_resins
./seven_plastics/3_polyvinylchloride_PVC
./seven_plastics/5_polypropylene_PP
./seven_plastics/4_low_density_polyethylene_PE-LD
./seven_plastics/2_high_density_polyethylene_PE-HD
./seven_plastics/1_polyethylene_PET
{'30b14a3f-487a-4f19-80b1-f24ddb072c25.jpg': 6, 'IMG_7043.jpg': 6, 'PSPB6564.jpg': 6, 'MIQA3922.jpg': 6, 'PTAU0812.jpg': 6, 'IMG_6732.jpg': 6, 'IMG_E6426.jpg': 6, 'IMG_9723.jpg': 6, 'IMG_9722.jpg': 6, 'IMG_7259.jpg': 6, 'IMG_6488.jpg': 6, 'IMG_7228.jpg': 6, 'IMG_6425.jpg': 6, 'IMG_7261.jpg': 6, 'IMG_7229.jpg': 6, 'IMG_7266.jpg': 6, 'IMG_6724.jpg': 6, 'IMG_6837.jpg': 6, 'IMG_6248.jpg': 6, 'IMG_6426.jpg': 6, 'IMG_7344.jpg': 6, 'IMG_6490.jpg': 6, 'IMG_7331.jpg': 6, 'IMG_6491.jpg': 6, 'IMG_6565.jpg': 6, 'IMG_6493.jpg': 6, 'IMG_7267.jpg': 6, 'IMG_9692.jpg': 6, 'IMG_9691.jpg': 6, 'IMG_6535.jpg': 6, 'IMG_9724.jpg': 6, 'IMG_8047.jpg': 6, 'IMG_6765.jpg': 6, 'IMG_6338.jpg': 5, 'IMG_6525.jpg'

In [6]:
print(os.getcwd())

import pathlib
from pathlib import Path

myDir = Path(os.getcwd()) / "yes"

print(myDir)

/home/jason/RecyclingCodesProject
/home/jason/RecyclingCodesProject/yes


In [4]:
class RecyclingTrainDataset(Dataset):
    def __init__(self, setname):
        self.setname = setname
        assert setname in ['train', 'eval', 'test']
        
        overall_dataset_dir = Path(os.getcwd()) / 'plastic_data'
        self.selected_dataset_dir = overall_dataset_dir / setname
        
        #E.g. self.all_filenames = ['006.png','007.png','008.png'] when setname=='val'
        self.all_filenames = os.listdir(self.selected_dataset_dir)
        self.all_labels = json.load(overall_dataset_dir / Path('labels.json'))
    
    def __len__(self):
        """Return the total number of examples in this split, e.g. if
        self.setname=='train' then return the total number of examples
        in the training set"""
        return len(self.all_filenames)
        
    def __getitem__(self, idx):
        """Return the example at index [idx]. The example is a dict with keys
        'data' (value: Tensor for an RGB image) and 'label' (value: multi-hot
        vector as Torch tensor of gr truth class labels)."""
        selected_filename = self.all_filenames[idx]
        imagepil = Image.open(os.path.join(self.selected_dataset_dir,selected_filename)).convert('RGB')
        
        #convert image to Tensor and normalize
        image = transformImage(imagepil)
        
        #load label
        label = torch.Tensor(self.all_labels.loc[selected_filename,:].values)
        
        sample = {'data':image, #preprocessed image, for input into NN
                  'label':label,
                  'img_idx':idx}
        return sample
    

def transformImage(image):
    transforms = torchvision.transforms.Compose([torchvision.transforms.ToTensor()])
    
    return transforms(image)

In [None]:
rtd = RecyclingTrainDataset('train')