In [3]:
#read directories inside a parent
parent_dir = "./capture/train/"
import os
import pandas as pd

#read all files in a directory
def read_files_in_dir(dir):
    files = os.listdir(dir)
    files = [dir + file for file in files]
    return files

print("Reading files in directory: ", parent_dir)
files = read_files_in_dir(parent_dir)
print("Number of files: ", len(files))

Reading files in directory:  ./capture/train/
Number of files:  2100


In [21]:
df = pd.DataFrame()
data = []
for file in files:
    x = file.split('/')[-1].split('_')[0].split(",")[0][1:]
    y = file.split('/')[-1].split('_')[0].split(",")[1][1:]
    z = file.split('/')[-1].split('_')[0].split(",")[2][1:-1]
    angle = file.split('/')[-1].split('_')[2].split(".")[0]
    height = file.split('/')[-1].split('_')[1]
    filename = file.split('/')[-1]
    data.append({'x': x, 'y': y, 'z': z, 'angle': angle, 'height': height, 'filename':filename})

df = pd.concat([df, pd.DataFrame(data)], axis=1)
df.to_pickle("train.pkl")

In [4]:
df = pd.read_pickle("train.pkl")
df

Unnamed: 0,x,y,z,angle,height,filename
0,-1.55,0.00,-1.36,0,1.2,"(-1.55, 0.00, -1.36)_1.2_0.png"
1,-1.55,0.00,-1.36,120,1.2,"(-1.55, 0.00, -1.36)_1.2_120.png"
2,-1.55,0.00,-1.36,150,1.2,"(-1.55, 0.00, -1.36)_1.2_150.png"
3,-1.55,0.00,-1.36,180,1.2,"(-1.55, 0.00, -1.36)_1.2_180.png"
4,-1.55,0.00,-1.36,210,1.2,"(-1.55, 0.00, -1.36)_1.2_210.png"
...,...,...,...,...,...,...
2095,2.47,0.00,6.90,30,1,"(2.47, 0.00, 6.90)_1_30.png"
2096,2.47,0.00,6.90,300,1,"(2.47, 0.00, 6.90)_1_300.png"
2097,2.47,0.00,6.90,330,1,"(2.47, 0.00, 6.90)_1_330.png"
2098,2.47,0.00,6.90,60,1,"(2.47, 0.00, 6.90)_1_60.png"


In [7]:
import pandas as pd
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from PIL import Image
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")


In [8]:
class CustomDataset(Dataset):
    def __init__(self, dataframe, root_dir, transform=None):
        """
        Args:
            dataframe (pandas.DataFrame): DataFrame containing image paths and labels.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied on a sample.
        """
        self.dataframe = dataframe
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir, self.dataframe.iloc[idx, 5])
        image = Image.open(img_name)
        labels = self.dataframe.iloc[idx, [0,2]].astype('float').to_numpy()

        if self.transform:
            image = self.transform(image)

        return image, labels
transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


In [9]:
# Assuming df is your DataFrame
dataset = CustomDataset(dataframe=df, root_dir='./capture/train/', transform=transform)
dataloader = DataLoader(dataset, batch_size=100, shuffle=True)


In [10]:
dataloader.dataset[0][0].shape

torch.Size([3, 256, 256])

In [11]:
model = models.resnet50(pretrained=True)
model = model.to(device)

# Replace the last fully connected layer
# Number of features depends on the model architecture
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, 2)  # x, y, z, angle, height
model = model.to(device)




In [12]:
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [13]:
num_epochs = 20  # You can adjust this
for epoch in range(num_epochs):
    for inputs, labels in dataloader:
        inputs, labels = inputs.to(device), labels.to(device).float()
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}')


Epoch 1/20, Loss: 2.516969680786133
Epoch 2/20, Loss: 1.892028570175171
Epoch 3/20, Loss: 2.94075608253479
Epoch 4/20, Loss: 2.542177677154541
Epoch 5/20, Loss: 1.052096962928772
Epoch 6/20, Loss: 1.047052025794983
Epoch 7/20, Loss: 0.8226905465126038
Epoch 8/20, Loss: 1.5843334197998047
Epoch 9/20, Loss: 1.080967664718628
Epoch 10/20, Loss: 0.819031298160553
Epoch 11/20, Loss: 0.6723946332931519
Epoch 12/20, Loss: 0.5550655126571655
Epoch 13/20, Loss: 0.34133902192115784
Epoch 14/20, Loss: 0.7011778950691223
Epoch 15/20, Loss: 0.32363489270210266
Epoch 16/20, Loss: 0.6057819724082947
Epoch 17/20, Loss: 0.40040308237075806
Epoch 18/20, Loss: 0.4121708273887634
Epoch 19/20, Loss: 0.38906943798065186
Epoch 20/20, Loss: 0.5810004472732544


In [90]:
#free up the gpu
torch.cuda.empty_cache()
#free up dedicated memory
del model

In [16]:

model.eval()  # Set the model to evaluation mode

transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

def load_image(image_path):
    image = Image.open(image_path)
    image = transform(image).float()
    image = image.unsqueeze(0)  # Add batch dimension
    return image




In [24]:
image = load_image('./capture/testCapture/IMG_7131_94.jpg')
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
image = image.to(device)
with torch.no_grad():
    prediction = model(image)
    prediction = prediction.cpu().numpy()[0]
print(f"Predicted camera parameters: {prediction}")

Predicted camera parameters: [-4.859378 -5.093643]
