In [4]:
# ! uv add joblib scikit-learn matplotlib

In [17]:
import os
import torch
import json
import joblib
import numpy as np
from PIL import Image
import torch.nn as nn
from pathlib import Path
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
from torchvision import transforms
from efficientnet_pytorch import EfficientNet
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [6]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = EfficientNet.from_pretrained('efficientnet-b5').to(device)
model.eval();

Loaded pretrained weights for efficientnet-b5


In [7]:
for param in model.parameters():
    param.requires_grad = False

In [8]:
transform = transforms.Compose([
    transforms.Resize((380, 380)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [9]:
image_paths = list(Path('car-img-hr').glob('*.jpg'))
batch_size = 32
all_features = []
with open('project-6-at-2025-11-18-10-24-3493761b.json', 'r') as f:
    data = json.load(f)
result = [{'image': t['file_upload'], 'shoot_category': next((r['value']['choices'][0] for r in t['annotations'][0]['result'] if r.get('from_name') == 'shoot_category'), None)} for t in data if t['annotations']]

result[0]

{'image': '2aef963a-6866a00bc97fc6a75b49511b_slot_images_dicky_door_open_view.jpg',
 'shoot_category': 'interior'}

In [10]:
import re
import random


result_dict = {}
for res in result:
    label_studio_filename = os.path.basename(res["image"])
    actual_filename = re.sub(r'^[a-f0-9]+-', '', label_studio_filename)
    result_dict[actual_filename] = res["shoot_category"]

all_features = []
all_labels = []

for i in range(0, len(image_paths), batch_size):
    batch_paths = image_paths[i:i+batch_size]
    batch = torch.stack([transform(Image.open(p).convert('RGB')) for p in batch_paths]).to(device)
    
    with torch.no_grad():
        features = model.extract_features(batch)
        features = torch.nn.functional.adaptive_avg_pool2d(features, 1).squeeze(-1).squeeze(-1)
    
    
    all_features.append(features.cpu().numpy())
    
    current_labels = []
    for path in batch_paths:
        filename = os.path.basename(path)
        
        if filename in result_dict:
            category = result_dict[filename]
            label = 0 if category.lower() == 'exterior' else 1
            current_labels.append(label)
        else:
            random_label = random.choice([0, 1])
            current_labels.append(random_label)
            print(f"Warning: No match for {filename}, assigned random label: {random_label}")
    
    all_labels.extend(current_labels)
    print(f"Batch labels: {current_labels}")


features_array = np.vstack(all_features)
labels_array = np.array(all_labels)

print(f"\nTotal images: {len(image_paths)}")
print(f"Features shape: {features_array.shape}")
print(f"Labels shape: {labels_array.shape}")

Batch labels: [0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0]
Batch labels: [0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0]

Total images: 57
Features shape: (57, 2048)
Labels shape: (57,)


In [11]:
features_array.shape

(57, 2048)

In [12]:
X_train, X_test, y_train, y_test = train_test_split(
    features_array,
    labels_array,
    test_size=0.2,
    random_state=42,
    stratify=labels_array 
)
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(45, 2048) (45,)
(12, 2048) (12,)


In [15]:
X_train_torch = torch.tensor(X_train, dtype=torch.float32)
y_train_torch = torch.tensor(y_train, dtype=torch.long) 
X_test_torch = torch.tensor(X_test, dtype=torch.float32)
y_test_torch = torch.tensor(y_test, dtype=torch.long)

In [18]:
class SimpleNNHead(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 1)
        )
        
    def forward(self, x):
        return self.net(x)


In [13]:
# tsne = TSNE(n_components=2, random_state=42, perplexity=30, max_iter=1000)
# embeddings_2d = tsne.fit_transform(features_array)

# # Create visualization
# plt.figure(figsize=(10, 8))
# colors = ['blue', 'red']
# labels = ['Exterior', 'Interior']

# for i in range(2):
#     mask = labels_array == i
#     plt.scatter(embeddings_2d[mask, 0], embeddings_2d[mask, 1], 
#                 c=colors[i], label=labels[i], alpha=0.6, s=100)

# plt.xlabel('t-SNE Dimension 1', fontsize=12)
# plt.ylabel('t-SNE Dimension 2', fontsize=12)
# plt.title('t-SNE Visualization of EfficientNet-B5 Embeddings', fontsize=14)
# plt.legend()
# plt.grid(True, alpha=0.3)
# plt.tight_layout()
# plt.savefig('tsne_embeddings.png', dpi=300)
# plt.show()
