In [5]:
import os
import torch
import torchvision.transforms as transforms
from torchvision.models import resnet18
from PIL import Image

# Load pre-trained ResNet model and remove the top fully connected layer
base_model = resnet18(pretrained=True)
base_model = torch.nn.Sequential(*(list(base_model.children())[:-1]))

# Define image preprocessing
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), # the mean and std come from imagenet dataset
])

# Define dataset folder path
trainset = 'batch1_train'
valset = 'batch1_val'
testset = 'batch1_test'

# Create an empty list to store features
train_features = []
val_features = []
test_features = []

# Iterate through all image files in the dataset folder
for filename in sorted(os.listdir(trainset)):
    if filename.endswith('.jpg') or filename.endswith('.jpeg') or filename.endswith('.png'):
        # Construct image path
        image_path = os.path.join(trainset, filename)

        # Load image
        img = Image.open(image_path)

        # Preprocess image and add batch dimension
        img_tensor = preprocess(img).unsqueeze(0)

        # Set model to evaluation mode
        base_model.eval()

        # Use ResNet model for feature extraction
        with torch.no_grad():
            features = base_model(img_tensor)

        # Append features to the list
        train_features.append(features.squeeze())



# Iterate through all image files in the dataset folder
for filename in sorted(os.listdir(valset)):
    if filename.endswith('.jpg') or filename.endswith('.jpeg') or filename.endswith('.png'):
        # Construct image path
        image_path = os.path.join(valset, filename)

        # Load image
        img = Image.open(image_path)

        # Preprocess image and add batch dimension
        img_tensor = preprocess(img).unsqueeze(0)

        # Set model to evaluation mode
        base_model.eval()

        # Use ResNet model for feature extraction
        with torch.no_grad():
            features = base_model(img_tensor)

        # Append features to the list
        val_features.append(features.squeeze())

# Iterate through all image files in the dataset folder
for filename in sorted(os.listdir(testset)):
    if filename.endswith('.jpg') or filename.endswith('.jpeg') or filename.endswith('.png'):
        # Construct image path
        image_path = os.path.join(testset, filename)

        # Load image
        img = Image.open(image_path)

        # Preprocess image and add batch dimension
        img_tensor = preprocess(img).unsqueeze(0)

        # Set model to evaluation mode
        base_model.eval()

        # Use ResNet model for feature extraction
        with torch.no_grad():
            features = base_model(img_tensor)

        # Append features to the list
        test_features.append(features.squeeze())

# we have 699 image, each image has 512 features
# print(train_features[1119].shape)
# print(val_features[69].shape)
# print(test_features[69].shape)

train_features = [feature.tolist() for feature in train_features]
val_features = [feature.tolist() for feature in val_features]
test_featurs = [feature.tolist() for feature in test_features]



[1.9368864297866821, 0.6377246379852295, 0.5309012532234192, 0.464469313621521, 0.12022065371274948, 1.1262922286987305, 1.3133279085159302, 0.19428984820842743, 0.6859062910079956, 0.9577176570892334, 1.07231605052948, 0.08937019109725952, 0.6679978370666504, 0.7329983711242676, 0.11596658080816269, 0.8502063155174255, 0.1575460582971573, 0.964133620262146, 0.8839544057846069, 0.14984974265098572, 4.367363929748535, 0.3618982136249542, 0.17469607293605804, 0.9499745965003967, 3.5728280544281006, 0.32371315360069275, 0.7395952343940735, 0.29348325729370117, 0.04847359657287598, 1.0548772811889648, 2.378603458404541, 0.12557417154312134, 0.1821325272321701, 1.0216805934906006, 0.2924525737762451, 1.5282306671142578, 0.2740168571472168, 1.571865439414978, 1.6476329565048218, 0.6612553596496582, 0.7745304703712463, 0.8709194660186768, 0.6343944072723389, 0.11610282212495804, 1.3654146194458008, 0.4953897297382355, 0.2722972631454468, 0.5749599933624268, 2.4747841358184814, 1.2285931110382

In [9]:
print(len(train_features[0]))
print(len(train_features[1119]))
print(len(train_features))


512
512
1120


In [23]:
import os
import pandas as pd

# Load label.csv file
label_df = pd.read_csv('batch1label.csv')
labelname = "Firmness"

# Initialize lists to store labels
train_labels = []
val_labels = []
test_labels = []

# Iterate through all image files in the dataset folder
for filename in sorted(os.listdir(trainset)):
    if filename.endswith('.jpg'):
        # Extract filename without extension
        filename_without_extension = os.path.splitext(filename.replace("original_", ""))[0]
        # print("Filename without extension:", filename_without_extension)
        print(label_df)


        # Extract label from label.csv based on filename
        label = label_df[label_df["Image_name"] == filename_without_extension][labelname].iloc[0]
        train_labels.append(label)

for filename in sorted(os.listdir(valset)):
    if filename.endswith('.jpg'):
        # Extract filename without extension
        filename_without_extension = os.path.splitext(filename)[0]
        
        # Extract label from label.csv based on filename
        label = label_df[label_df["Image_name"] == filename_without_extension][labelname].iloc[0]
        val_labels.append(label)

for filename in sorted(os.listdir(testset)):
    if filename.endswith('.jpg'):
        # Extract filename without extension
        filename_without_extension = os.path.splitext(filename)[0]
        
        # Extract label from label.csv based on filename
        label = label_df[label_df["Image_name"] == filename_without_extension][labelname].iloc[0]
        test_labels.append(label)

#batch1 1120 70 70
#batch2 1118 70 70
#batch1+2 2238 140 140
#4cbatch1 800 50 50



                 Image_name  Storage_date  L*value  a*value  b*value  \
0    20240205_185051921_iOS             1  32.0658  -1.4975  11.1175   
1    20240205_185100187_iOS             1  32.0658  -1.4975  11.1175   
2    20240205_185226787_iOS             1  32.0658  -1.4975  11.1175   
3    20240205_185255626_iOS             1  32.0658  -1.4975  11.1175   
4    20240205_185354882_iOS             1  32.0658  -1.4975  11.1175   
..                      ...           ...      ...      ...      ...   
695  20240212_193130383_iOS             8  26.8650   1.8275   2.4450   
696  20240212_193137718_iOS             8  26.8650   1.8275   2.4450   
697  20240212_193144338_iOS             8  26.8650   1.8275   2.4450   
698  20240212_193150684_iOS             8  26.8650   1.8275   2.4450   
699  20240212_193157334_iOS             8  26.8650   1.8275   2.4450   

       delta_E  Firmness  Inner_image  Unnamed: 8  
0    62.173212   32.2687          0.0         NaN  
1    62.173212   32.2687       