In [1]:
import torch
import torchvision.transforms as transforms
from torchvision.models import resnet50
from PIL import Image
import numpy as np
from tqdm import tqdm

resnet50_model = resnet50()  

state_dict = torch.hub.load_state_dict_from_url("https://download.pytorch.org/models/resnet50-19c8e357.pth")
resnet50_model.load_state_dict(state_dict)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
resnet50_model.to(device)
resnet50_model.eval()

embeds = {}

def extract_features(image_paths):
    features = []
    for path in tqdm(image_paths, desc="Extracting features", unit="image"):
        image = Image.open(path)
        image_tensor = transforms.Compose([
            transforms.Resize((16, 16)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])(image).unsqueeze(0).to(device)  

        def get_activation(name):
            def hook(model, input, output):
                embeds[name] = output
            return hook
        
        hooks = []
        for name, layer in resnet50_model.named_children():
            if name in ['layer1', 'layer2', 'layer3', 'layer4']:
                for i, bottleneck in enumerate(layer.children()):
                    hook1 = bottleneck.conv1.register_forward_hook(get_activation(f'conv1_{name}_{i}'))
                    hook2 = bottleneck.conv2.register_forward_hook(get_activation(f'conv2_{name}_{i}'))
                    hook3 = bottleneck.conv3.register_forward_hook(get_activation(f'conv3_{name}_{i}'))
                    hooks.extend([hook1, hook2, hook3])

        with torch.no_grad():
            resnet50_model(image_tensor)
        
        for hook in hooks:
            hook.remove()
        
        image_features = []
        for name, activation in embeds.items():
            image_features.extend(activation.squeeze().cpu().numpy().flatten())  # انتقال ویژگی‌ها به CPU
        features.append(image_features)
    
    return np.array(features)



In [2]:

import os

image_folder = "Data/Images/"

image_files = [os.path.join(image_folder, file) for file in os.listdir(image_folder) if file.endswith(('.jpg', '.jpeg', '.png', '.bmp'))]

image_features = extract_features(image_files)

image_features.shape

Extracting features:   0%|          | 0/9737 [00:00<?, ?image/s]

Extracting features: 100%|██████████| 9737/9737 [03:02<00:00, 53.44image/s]


(9737, 51456)

In [3]:
import pandas as pd
df = pd.DataFrame()
df['Images Address']=image_files

In [4]:
df

Unnamed: 0,Images Address
0,Data/Images/Mazda-2000 (1).jpg
1,Data/Images/Mazda-2000 (10).jpg
2,Data/Images/Mazda-2000 (100).jpg
3,Data/Images/Mazda-2000 (101).jpg
4,Data/Images/Mazda-2000 (102).jpg
...,...
9732,Data/Images/Tiba2 (95).jpg
9733,Data/Images/Tiba2 (96).jpg
9734,Data/Images/Tiba2 (97).jpg
9735,Data/Images/Tiba2 (98).jpg


In [5]:
df.head()

Unnamed: 0,Images Address
0,Data/Images/Mazda-2000 (1).jpg
1,Data/Images/Mazda-2000 (10).jpg
2,Data/Images/Mazda-2000 (100).jpg
3,Data/Images/Mazda-2000 (101).jpg
4,Data/Images/Mazda-2000 (102).jpg


In [6]:


loaded_data = image_features




In [7]:
loaded_data.shape


(9737, 51456)

In [8]:
loaded_data[0].shape

(51456,)

In [9]:
from sklearn.decomposition import PCA
import plotly.express as px



pca = PCA(n_components=3)
reduced_data = pca.fit_transform(loaded_data)

import pandas as pd
df = pd.DataFrame(reduced_data, columns=['Component 1', 'Component 2', 'Component 3'])

fig = px.scatter_3d(df, x='Component 1', y='Component 2', z='Component 3', title="3D Scatter Plot of Reduced Features")
fig.show()


In [10]:
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
import plotly.express as px
import pandas as pd
from tqdm import tqdm


class PCAWithProgress(PCA):
    def fit_transform(self, X, y=None):
        with tqdm(total=1, desc="PCA") as pbar:
            result = super().fit_transform(X, y)
            pbar.update(1)
        return result

pca = PCAWithProgress(n_components=3)
reduced_data = pca.fit_transform(loaded_data)

class KMeansWithProgress(KMeans):
    def fit(self, X, y=None):
        with tqdm(total=1, desc="KMeans") as pbar:
            result = super().fit(X, y)
            pbar.update(1)
        return result

kmeans = KMeansWithProgress(n_clusters=13, n_init='auto')  
kmeans.fit(loaded_data)
labels = kmeans.labels_

df = pd.DataFrame(reduced_data, columns=['Component 1', 'Component 2', 'Component 3'])
df['Cluster'] = labels

fig = px.scatter_3d(df, x='Component 1', y='Component 2', z='Component 3', color='Cluster', title="3D Scatter Plot of Clustered Features")
fig.show()



PCA: 100%|██████████| 1/1 [00:06<00:00,  6.39s/it]
KMeans: 100%|██████████| 1/1 [00:50<00:00, 50.00s/it]


In [11]:
labels

array([2, 2, 3, ..., 0, 7, 9])

In [13]:

DataFrame = pd.DataFrame({
    'File Address': image_files,
    'Label': labels
})




In [14]:
DataFrame.head()
DataFrame.to_csv('labeledImages.csv')