**Importing required packages**

In [34]:
import torch
import torchvision
import numpy as np
from PIL import Image
import requests
from torchvision import models
from torchvision import transforms

**Importing the IO mapping from a text file from Places365**

In [35]:
io_mapping = []
with open('IO_places365.txt') as f:
    for line in f:
        parts = line.strip().split()
        io_mapping.append(int(parts[1]))
io_mapping = np.array(io_mapping)
io_mapping_binary = (io_mapping == 2).astype(int)

*IO mapping shows the categories like Indoor or Outdoor*

**Importing a pretrained model called Alexnet (Places365)** 

In [36]:
model = models.alexnet(num_classes=365)
checkpoint = torch.hub.load_state_dict_from_url(
    'http://places2.csail.mit.edu/models_places365/alexnet_places365.pth.tar',
    map_location=torch.device('cpu')
)
state_dict = {k.replace('module.',''): v for k,v in checkpoint['state_dict'].items()}
model.load_state_dict(state_dict)
model.eval()

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [37]:
preprocess = transforms.Compose([
    transforms.Resize((256,256)), #Make all images 256×256 pixels
    transforms.CenterCrop(224), #Take central 224×224 crop for AlexNet input
    transforms.ToTensor(), #Convert image to PyTorch tensor and scale pixels 0 -> 1
    transforms.Normalize(mean=[0.485,0.456,0.406],
                         std=[0.229,0.224,0.225]) 
]) #Standardize channels to match pretrained model's expected distribution

**Bonus : Extending the model to find out different Categories**

In [38]:
categories = []
with open('categories_places365.txt') as f:
    for line in f:
        # remove index at start of line
        categories.append(line.strip().split(' ')[0][3:])

**Defining the funtion that classifies each picture**

In [39]:
def classify_scene_single(img_path):
    img = Image.open(img_path).convert('RGB')
    input_tensor = preprocess(img).unsqueeze(0)

    with torch.no_grad():
        logit = model(input_tensor)
        probs = torch.nn.functional.softmax(logit, dim=1).cpu().numpy()[0]

    # Most probable category
    top_idx = probs.argmax()
    top_scene = categories[top_idx]
    confidence = probs[top_idx] * 100 

    # Indoor/Outdoor
    indoor_prob = np.sum(probs[io_mapping_binary == 0])
    outdoor_prob = np.sum(probs[io_mapping_binary == 1])
    io_label = "Indoor" if indoor_prob > outdoor_prob else "Outdoor"

    return top_scene, confidence, io_label

**Calling the function**

In [40]:
img_path = 'trial/outdoor_1.jpg'
scene, confidence, io_label = classify_scene_single(img_path)

print(f"Prediction: {scene}")
print(f"Confidence: {confidence:.2f}%")
print(f"Indoor/Outdoor: {io_label}")

Prediction: volcano
Confidence: 38.59%
Indoor/Outdoor: Outdoor


*Please add **img_path** as the one you choose*

I have indoor_1.jpg, indoor_2.jpg, outdoor_1.jpg and outdoor_2.jpg as trial inputs.