# <span style="color:red">**PinpointAI | Part II**</span>

## CSV Import form Data Collection

In [112]:
import pandas as pd

## Concat Data frames

In [113]:
# Optionally, save the combined DataFrame to a new CSV
df_combined.to_csv('/Users/mbouch17/Desktop/Personal_Data_Project/PinpointAI/combined_coordinates.csv', index=False)

In [114]:
df_combined.shape

(25000, 2)

## Streetview Images Collection

## Data set duplicate cleaning

## Image Integrity Check 

In [115]:
import os
from PIL import Image

# Path to your dataset
dataset_path = "/Users/mbouch17/Desktop/Personal_Data_Project/PinpointAI/jpeg_streetview_image"

# Supported image extensions
valid_extensions = (".jpg", ".jpeg", ".png")

# Check for corrupted images
for filename in os.listdir(dataset_path):
    if filename.lower().endswith(valid_extensions):  # Check if the file is an image
        try:
            img = Image.open(os.path.join(dataset_path, filename))
            img.verify()  # Verify image integrity
        except (IOError, SyntaxError) as e:
            print(f"Corrupted image: {filename}")
    else:
        print(f"Skipping non-image file: {filename}")


Skipping non-image file: .DS_Store
Skipping non-image file: .ipynb_checkpoints


## Image Preprocessing and Transformation

In [116]:
import os
from torchvision import transforms
from PIL import Image
import torch
from torch.utils.data import DataLoader, TensorDataset

In [None]:
# Path to your dataset
dataset_path = "/Users/mbouch17/Desktop/Personal_Data_Project/PinpointAI/jpeg_streetview_image"

# Define transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize to 224x224
    transforms.ToTensor(),  # Convert to tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize
])

# Preprocess and save all images as tensors
preprocessed_images = []  # To store processed tensors
image_labels = []  # Assuming you have corresponding labels for each image

# Supported image extensions
valid_extensions = (".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".gif")

for filename in os.listdir(dataset_path):
    if filename.lower().endswith(valid_extensions):  # Check if the file is an image
        try:
            image_path = os.path.join(dataset_path, filename)
            image = Image.open(image_path).convert("RGB")  # Ensure image has 3 color channels
            processed_image = transform(image)
            preprocessed_images.append(processed_image)
            
            # Append corresponding label if available (optional, depends on your dataset structure)
            # Example: Extract label from filename or a separate label file
            # label = extract_label_from_filename(filename)  # Placeholder function
            # image_labels.append(label)
            
        except Exception as e:
            print(f"Error processing {filename}: {e}")

# Convert the list to a tensor batch for easier handling (optional)
if preprocessed_images:
    batched_images = torch.stack(preprocessed_images)
    print(f"Processed {len(preprocessed_images)} images successfully.")


## Visualize Processed Image

In [None]:
# Example: Visualize image tensors or labels
import matplotlib.pyplot as plt

# Display a few processed images (unnormalized for visualization)
def show_image(tensor):
    tensor = tensor.permute(1, 2, 0)  # Change dimension order to HxWxC
    tensor = tensor * torch.tensor([0.229, 0.224, 0.225]) + torch.tensor([0.485, 0.456, 0.406])  # De-normalize
    tensor = torch.clip(tensor, 0, 1)  # Ensure pixel values are between 0 and 1
    plt.imshow(tensor)
    plt.axis("off")
    plt.show()

show_image(preprocessed_images[0])  # Display first image


## Split Data
- Divide your dataset into training, validation, and test sets (e.g., 70% training, 20% validation, 10% test).
- Ensure the split is random but balanced across regions or coordinate ranges.

In [None]:
from sklearn.model_selection import train_test_split

# Example split (modify if you have labels)
train_images, val_images = train_test_split(preprocessed_images, test_size=0.2, random_state=42)
val_images, test_images = train_test_split(val_images, test_size=0.5, random_state=42)

print(f"Training set size: {len(train_images)}")
print(f"Validation set size: {len(val_images)}")
print(f"Test set size: {len(test_images)}")


## Custom Dataset Class

In [None]:
from torch.utils.data import Dataset
from PIL import Image
import os

class ImageDataset(Dataset):
    def __init__(self, image_paths, transform=None):
        self.image_paths = image_paths
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        # Ensure image_paths[idx] is a valid path
        image_path = self.image_paths[idx]

        # Open the image
        image = Image.open(image_path).convert("RGB")  # Ensure it's in RGB format

        # Apply transformations if provided
        if self.transform:
            image = self.transform(image)

        return image


## Data Augmentation and Transformation for Training and Validation

In [None]:
from torchvision import transforms

# Define transformations
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),  # Augmentation for training
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

val_test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


## Loading Dataset using ImageFolder

In [None]:
from torchvision import datasets
from torch.utils.data import DataLoader

# Define your dataset (replace with your own data path)
train_dataset = datasets.ImageFolder(root='/Users/mbouch17/Desktop/Personal_Data_Project/PinpointAI/jpeg_streetview_image', transform=train_transform)
val_dataset = datasets.ImageFolder(root='/Users/mbouch17/Desktop/Personal_Data_Project/PinpointAI/jpeg_streetview_image', transform=val_test_transform)
test_dataset = datasets.ImageFolder(root='/Users/mbouch17/Desktop/Personal_Data_Project/PinpointAI/jpeg_streetview_image', transform=val_test_transform)

# Define DataLoader for batching
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4)


## DataLoader Setup

In [None]:
from torch.utils.data import DataLoader

batch_size = 64  # Adjust based on your GPU/CPU memory

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


## Model Definition with Pre-trained ResNet18

In [None]:
import torch
import torch.nn as nn
from torchvision.models import resnet18, ResNet18_Weights

# Load pre-trained ResNet18 with updated weights parameter
weights = ResNet18_Weights.IMAGENET1K_V1  # Equivalent to the old 'pretrained=True'
model = resnet18(weights=weights)

# Replace the final fully connected layer
num_classes = 100  # Example: Adjust based on your dataset
model.fc = nn.Linear(model.fc.in_features, num_classes)


## Model Training Loop

In [None]:

criterion = nn.CrossEntropyLoss()  # For classification
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

for epoch in range(10):  # Adjust the number of epochs
    model.train()
    train_loss = 0.0

    for batch in train_loader:
        images, labels = batch  # Unpack images and labels
        images = images.to(device)
        labels = labels.to(device)  # Move labels to the same device as the model

        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(images)

        # Compute the loss
        loss = criterion(outputs, labels)  # Use the actual labels
        train_loss += loss.item()

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch+1}, Loss: {train_loss/len(train_loader)}")


## Freezing Pre-trained Layers

In [None]:
for param in model.parameters():
    param.requires_grad = False  # Freeze earlier layers

for param in model.fc.parameters():
    param.requires_grad = True  # Unfreeze the classifier layer

optimizer = torch.optim.Adam(model.fc.parameters(), lr=1e-5)


## Training Loop with Gradients

In [None]:
criterion = nn.CrossEntropyLoss()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

num_epochs = 10  # Adjust based on your needs

for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()  # Clear previous gradients

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        train_loss += loss.item()

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch + 1}, Loss: {train_loss / len(train_loader):.4f}")


## Validation Loop

In [None]:
with torch.no_grad():
    model.eval()
    val_loss = 0.0
    for images, labels in val_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels)
        val_loss += loss.item()

    print(f"Validation Loss: {val_loss / len(val_loader):.4f}")

## Test the Model

In [None]:
test_loss = 0.0
correct = 0
total = 0

with torch.no_grad():
    model.eval()
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)

        # Compute loss
        loss = criterion(outputs, labels)
        test_loss += loss.item()

        # Compute accuracy
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Test Loss: {test_loss / len(test_loader):.4f}")
print(f"Test Accuracy: {100 * correct / total:.2f}%")


In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Function to denormalize the image
def denormalize_image(tensor, mean, std):
    mean = torch.tensor(mean).view(1, 3, 1, 1)
    std = torch.tensor(std).view(1, 3, 1, 1)
    tensor = tensor * std + mean  # Reverse the normalization
    return tensor

model.eval()
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)

        # Denormalize the image for visualization
        denormalized_image = denormalize_image(images[0], mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        denormalized_image = denormalized_image.squeeze().cpu().numpy().transpose(1, 2, 0)  # Convert to HWC format

        # Clip values to be in range [0, 1]
        denormalized_image = np.clip(denormalized_image, 0, 1)

        # Display the image
        plt.imshow(denormalized_image)
        plt.title(f"True: {labels[0].item()}, Predicted: {predicted[0].item()}")
        plt.show()
        break  # Display just one image


## Save & Deploy Model

In [None]:
#Save the model
torch.save(model.state_dict(), "fine_tuned_model.pth")


In [None]:
model.load_state_dict(torch.load("fine_tuned_model.pth", weights_only=True))
