# HW09

### Setup

Setting everything up

In [1]:
%pip install --upgrade Pillow pandas torch transformers

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [2]:
#Import libraries
from PIL import Image
from transformers import pipeline
from transformers import ViTImageProcessor, ViTForImageClassification
from pathlib import Path
import torch
import pandas as pd

### Model One

### Load Model One

Loading my Hugging Face Model

In [3]:
# Load model and processor
DIY_MODEL = "merelevy/diy-recommendation2"
processor = ViTImageProcessor.from_pretrained(DIY_MODEL)
model = ViTForImageClassification.from_pretrained(DIY_MODEL)

preprocessor_config.json:   0%|          | 0.00/353 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/916 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/343M [00:00<?, ?B/s]

### Update the Labels

Updating the labels to match DIY recommendations.

In [5]:
print(model.config.id2label)
model.config.id2label = {
    0: "Door, DIY",
    1: "Electrical, Pro",
    2: "Gas lines, Pro",
    3: "Pests, try DIY",
    4: "Sink, DIY"
}

# Also update label2id (reverse mapping)
model.config.label2id = {v: k for k, v in model.config.id2label.items()}

print(model.config.id2label)


{0: 'doors and door knobs', 1: 'electrical wiring', 2: 'gas line', 3: 'insects', 4: 'sink and faucet'}
{0: 'Door, DIY', 1: 'Electrical, Pro', 2: 'Gas lines, Pro', 3: 'Pests, try DIY', 4: 'Sink, DIY'}


### Test Zero
Running the model on the first image, to ensure accuracy

#### Uploading an Image
Uploading the first test image

In [6]:
# Define the images path
image_folder = Path("./TestImage0/")
image_files = list(image_folder.glob("*.*"))
print("Found images:", image_files)

# Check the image is really there
image_path = image_files[0]
image = Image.open(image_path)
print(type(image))

Found images: [PosixPath('TestImage0/sink0.png')]
<class 'PIL.PngImagePlugin.PngImageFile'>


#### Running the Model
Running the model on the first image, to ensure accuracy

In [7]:
# Open the image
image = Image.open(image_path)

# RGB format
if image.mode != "RGB":
    image = image.convert("RGB")

# Processing
inputs = processor(images=image, return_tensors="pt")

# Inference
with torch.no_grad():
    outputs = model(**inputs)

# Get predicted class index
logits = outputs.logits
predicted_class = logits.argmax(-1).item()

# Labels
labels = model.config.id2label[predicted_class]
print("Recommended Advice:", labels)


Recommended Advice: Sink, DIY


### Defining the Image Processing Function

In [8]:
def process_images(folder_path):
    folder = Path(folder_path)
    image_files = list(folder.glob("*.*"))

    results = []

    for image_path in image_files:
        
        image = Image.open(image_path)

        if image.mode != "RGB":
            image = image.convert("RGB")
        inputs = processor(images=image, return_tensors="pt")

        with torch.no_grad():
            outputs = model(**inputs)
            logits = outputs.logits
            predicted_class = logits.argmax(-1).item()
        
        label = model.config.id2label[predicted_class]

        results.append((image_path.name, label))

    return pd.DataFrame(results, columns=["Image Name", "Predicted Label"])


### Test One
In this test, I test black and white vs. color capabilities and JPG, PNG, WEBPB and HEIC file types.

In [20]:
process_images("./TestImages1/")

Unnamed: 0,Image Name,Predicted Label
0,doorknobJPEG.jpg,"Door, DIY"
1,doorknobgrayPNG.png,"Door, DIY"
2,wiring.webp,"Electrical, Pro"
3,doorknobgrayJPG.jpg,"Door, DIY"
4,doorknobPNG.png,"Door, DIY"


### Test Two
In this test, I investigate how deterministic my model is, by repeating the same input 5 times.

In [11]:
process_images("./TestImages2/")

Unnamed: 0,Image Name,Predicted Label
0,doorknob2.png,"Door, DIY"
1,doorknob5.png,"Door, DIY"
2,doorknob4.png,"Door, DIY"
3,doorknob3.png,"Door, DIY"
4,doorknob1.png,"Door, DIY"


### Test Three
In this test, I investigate how well my model can recognize my 5 different use cases across 25 tests.

In [14]:
process_images("./TestImages3/")

Unnamed: 0,Image Name,Predicted Label
0,Door3.jpg,"Door, DIY"
1,Door4.jpg,"Sink, DIY"
2,Door5.jpg,"Sink, DIY"
3,Door2.jpeg,"Door, DIY"
4,Door1.jpeg,"Door, DIY"


In [15]:
process_images("./TestImages3.1/")

Unnamed: 0,Image Name,Predicted Label
0,Electrical2.jpg,"Electrical, Pro"
1,Electrical3.jpg,"Electrical, Pro"
2,Electrical1.jpg,"Electrical, Pro"
3,Electrical4.png,"Electrical, Pro"
4,Electrical5.png,"Electrical, Pro"


In [16]:
process_images("./TestImages3.2/")

Unnamed: 0,Image Name,Predicted Label
0,Faucet1.jpeg,"Sink, DIY"
1,Faucet3.jpg,"Sink, DIY"
2,Faucet4.jpg.jpeg,"Sink, DIY"
3,Faucet2.jpeg,"Sink, DIY"
4,Faucet5.jpg,"Sink, DIY"


In [17]:
process_images("./TestImages3.3/")

Unnamed: 0,Image Name,Predicted Label
0,Gas2.jpeg,"Door, DIY"
1,Gas3.jpg,"Electrical, Pro"
2,Gas4.jpg,"Electrical, Pro"
3,Gas1.jpeg,"Electrical, Pro"
4,Gas5.jpeg,"Electrical, Pro"


In [18]:
process_images("./TestImages3.4/")

Unnamed: 0,Image Name,Predicted Label
0,pests1.jpeg,"Pests, try DIY"
1,pests3.jpg,"Pests, try DIY"
2,pests5.jpg,"Pests, try DIY"
3,pests4.jpeg,"Pests, try DIY"
4,pests2.jpg,"Pests, try DIY"


### Test Four
In this test, I investigate how well my model reacts to multi-modal environments, with multiple potential classifications for each image.

In [21]:
process_images("./TestImages4/")

Unnamed: 0,Image Name,Predicted Label
0,kitchen-faucet-doors.jpg,"Sink, DIY"
1,faucet-pest.jpg,"Pests, try DIY"
2,wiring.webp,"Electrical, Pro"
3,bathroom-faucet.jpeg,"Sink, DIY"
4,gas-fireplace.jpeg,"Sink, DIY"


### Test Five
In this test, I investigate how my model responds to more intense variations of the images it has been trained to handle.

In [22]:
process_images("./TestImages5/")

Unnamed: 0,Image Name,Predicted Label
0,burning-house.jpeg,"Gas lines, Pro"
1,giant.bug.png,"Sink, DIY"
2,waterfall.jpg,"Gas lines, Pro"
3,multiple-buildings.jpeg,"Gas lines, Pro"
4,chandelier-art.jpg,"Door, DIY"


### Model Two

### Loading Model Two
Loading my Hugging Face Model

In [30]:
# Load model and processor
ACCESSIBLE_MODEL = "merelevy/environmental-accessibility"
processor = ViTImageProcessor.from_pretrained(ACCESSIBLE_MODEL)
model = ViTForImageClassification.from_pretrained(ACCESSIBLE_MODEL)

preprocessor_config.json:   0%|          | 0.00/353 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/848 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/343M [00:00<?, ?B/s]

### Update the Labels

Updating the labels to match accessibility recommendations.

In [34]:
print(model.config.id2label)
model.config.id2label = {
    0: "Accessible: Building Ramp",
    1: "Inaccessible: Room Sign, no Braille",
    2: "Accessible: Room Sign, Braille",
    3: "Inaccessible: Stairs"
}

# Also update label2id (reverse mapping)
model.config.label2id = {v: k for k, v in model.config.id2label.items()}

print(model.config.id2label)


{0: 'building ramp', 1: 'room signs', 2: 'sign with braille', 3: 'stairs'}
{0: 'Accessible: Building Ramp', 1: 'Inaccessible: Room Sign, no Braille', 2: 'Accessible: Room Sign, Braille', 3: 'Inaccessible: Stairs'}


### Test Six
Making sure everything is in working order with the second model.

In [35]:
# Define the images path
image_folder = Path("./TestImages6/")
image_files = list(image_folder.glob("*.*"))
print("Found images:", image_files)

# Check the image is really there
image_path = image_files[0]
image = Image.open(image_path)
print(type(image))

Found images: [PosixPath('TestImages6/stairs0.jpg')]
<class 'PIL.JpegImagePlugin.JpegImageFile'>


In [37]:
# Open the image
image = Image.open(image_path)

# RGB format
if image.mode != "RGB":
    image = image.convert("RGB")

# Processing
inputs = processor(images=image, return_tensors="pt")

# Inference
with torch.no_grad():
    outputs = model(**inputs)

# Get predicted class index
logits = outputs.logits
predicted_class = logits.argmax(-1).item()

# Labels
labels = model.config.id2label[predicted_class]
print(labels)

Inaccessible: Stairs


### Test Seven
In this test, I investigate how well my model can recognize my basic accessibility cases.

In [44]:
process_images("./TestImages7/")

Unnamed: 0,Image Name,Predicted Label
0,braille5.jpg,"Accessible: Room Sign, Braille"
1,braille4.webp,"Accessible: Room Sign, Braille"
2,braille1.jpg,"Accessible: Room Sign, Braille"
3,braille2.jpg,"Accessible: Room Sign, Braille"
4,braille3.jpg,"Accessible: Room Sign, Braille"


In [45]:
process_images("./TestImages7.1/")

Unnamed: 0,Image Name,Predicted Label
0,sign5.webp,"Accessible: Room Sign, Braille"
1,sign3.webp,"Accessible: Room Sign, Braille"
2,sign1.jpg,"Accessible: Room Sign, Braille"
3,sign2.jpeg,"Accessible: Room Sign, Braille"
4,sign4.webp,"Accessible: Room Sign, Braille"


In [46]:
process_images("./TestImages7.2/")

Unnamed: 0,Image Name,Predicted Label
0,ramp1.jpeg,Accessible: Building Ramp
1,ramp3.webp,Accessible: Building Ramp
2,ramp2.webp,Inaccessible: Stairs
3,ramp4.jpg,"Inaccessible: Room Sign, no Braille"
4,ramp5.jpg,Inaccessible: Stairs


In [41]:
process_images("./TestImages7.3/")

Unnamed: 0,Image Name,Predicted Label
0,stairs5.jpg,Inaccessible: Stairs
1,stairs3.jpeg,Inaccessible: Stairs
2,stairs1.jpg,Inaccessible: Stairs
3,stairs4.jpg.webp,Inaccessible: Stairs
4,stairs2.jpg,Inaccessible: Stairs


### Test Eight
In this test, I investigate how well my model can more complex situations, with accessible and inaccessible features in richer environmental contexts.

In [43]:
process_images("./TestImages8/")

Unnamed: 0,Image Name,Predicted Label
0,nobraille.jpg,"Inaccessible: Room Sign, no Braille"
1,both1.jpg,Inaccessible: Stairs
2,braille.png,"Accessible: Room Sign, Braille"
3,both2.jpeg,Accessible: Building Ramp
4,both3.webp,Inaccessible: Stairs
