In [89]:
import time
import os
import json
import boto3
import numpy as np
import sagemaker
import requests
import torch
import tqdm

import torch.nn as nn
import torch.optim as optim

from PIL import Image
from torchvision import models
from torchvision import transforms
from collections import defaultdict
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sagemaker.feature_store.feature_group import FeatureGroup


In [90]:
boto_session = boto3.Session()
region = boto_session.region_name

sm_session = sagemaker.Session()
sm_client = boto_session.client("sagemaker")
sm_role = sagemaker.get_execution_role()

s3_client = boto3.client('s3')

# Define your feature group name and region
feature_group_name = 'fire-image-feature-group'

# Athena client
athena_client = boto3.client('athena', region_name=region)

**Taking Data from Feature Store**

In [91]:
# Initialize the feature group
feature_group = FeatureGroup(name=feature_group_name, sagemaker_session=sm_session)

In [92]:
# Query
query = """SELECT *
FROM "AwsDataCatalog"."sagemaker_featurestore"."fire_image_feature_group_1718694943";
"""

# Run query
response = athena_client.start_query_execution(
    QueryString=query,
    QueryExecutionContext={
        'Database': 'sagemaker_featurestore'
    },
    ResultConfiguration={
        'OutputLocation': 's3://wildfires/feature-store-output/'
    }
)

# Get query execution ID
query_execution_id = response['QueryExecutionId']

# Wait for the query to complete
status = 'RUNNING'
while status != 'SUCCEEDED':
    response = athena_client.get_query_execution(QueryExecutionId=query_execution_id)
    status = response['QueryExecution']['Status']['State']

# Get the results
response = athena_client.get_query_results(QueryExecutionId=query_execution_id)

# Process the results into a DataFrame
rows = [row['Data'] for row in response['ResultSet']['Rows'][1:]]
columns = [col['VarCharValue'] for col in response['ResultSet']['Rows'][0]['Data']]

rows[:5]

[[{'VarCharValue': 'bf2519ec-cf14-4fdd-a4ba-78c1d02e960d'},
  {'VarCharValue': 's3://wildfires/fire_images/fire.400.png'},
  {'VarCharValue': '1'},
  {'VarCharValue': 'png'},
  {'VarCharValue': '2024-06-18T07:28:48.591943Z'},
  {'VarCharValue': '2024-06-18 07:33:49.904'},
  {'VarCharValue': '2024-06-18 07:28:53.000'},
  {'VarCharValue': 'false'}],
 [{'VarCharValue': '91cbb182-380d-42b7-9330-75a340a0695c'},
  {'VarCharValue': 's3://wildfires/fire_images/fire.401.png'},
  {'VarCharValue': '1'},
  {'VarCharValue': 'png'},
  {'VarCharValue': '2024-06-18T07:28:48.591950Z'},
  {'VarCharValue': '2024-06-18 07:33:57.493'},
  {'VarCharValue': '2024-06-18 07:28:53.000'},
  {'VarCharValue': 'false'}],
 [{'VarCharValue': '61f7b2dc-a9b5-48f5-b537-b74917e8f81d'},
  {'VarCharValue': 's3://wildfires/fire_images/fire.706.png'},
  {'VarCharValue': '1'},
  {'VarCharValue': 'png'},
  {'VarCharValue': '2024-06-18T07:28:48.594656Z'},
  {'VarCharValue': '2024-06-18 07:33:57.493'},
  {'VarCharValue': '2024-06

In [93]:
def download_images(metadata, download_dir='images'):
    if not os.path.exists(download_dir):
        os.makedirs(download_dir)

    for record in metadata:
        image_location = record['image_location']
        bucket, key = image_location.replace('s3://', '').split('/', 1)
        local_path = os.path.join(download_dir, os.path.basename(key))

        s3_client.download_file(bucket, key, local_path)

        record['local_path'] = local_path  # Add the local path to the record

    return metadata


metadata = [
    {
        'image_id': row[0]['VarCharValue'],
        'image_location': row[1]['VarCharValue'],
        'label': int(row[2]['VarCharValue']),
        'image_type': row[3]['VarCharValue'],
        'event_time': row[4]['VarCharValue'],
    } for row in rows
]

metadata = download_images(metadata)
print("Finished!")

Finished!


In [94]:
# Split the metadata into train, validation, and test sets
train_metadata, test_metadata = train_test_split(metadata, test_size=0.2, stratify=[m['label'] for m in metadata], random_state=42)
train_metadata, val_metadata = train_test_split(train_metadata, test_size=0.25, stratify=[m['label'] for m in train_metadata], random_state=42)

print(f"Training samples: {len(train_metadata)}")
print(f"Validation samples: {len(val_metadata)}")
print(f"Test samples: {len(test_metadata)}")

Training samples: 599
Validation samples: 200
Test samples: 200


In [95]:
class FireDataset(Dataset):
    def __init__(self, metadata, transform=None):
        self.metadata = metadata
        self.transform = transform

    def __len__(self):
        return len(self.metadata)

    def __getitem__(self, idx):
        img_path = self.metadata[idx]['local_path']
        image = Image.open(img_path).convert('RGB')
        label = self.metadata[idx]['label']

        if self.transform:
            image = self.transform(image)

        return image, label


In [96]:
# Define transformations for training and validation/test datasets
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

val_test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Create datasets
train_dataset = FireDataset(train_metadata, transform=train_transform)
val_dataset = FireDataset(val_metadata, transform=val_test_transform)
test_dataset = FireDataset(test_metadata, transform=val_test_transform)

# Create dataloaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


**Training**

In [97]:
model = models.resnet18(weights='ResNet18_Weights.DEFAULT')

In [98]:
model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [99]:
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 2)

In [100]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)

In [101]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.1)

In [102]:
def train(model, train_loader, optimizer, loss_function, epoch, device):
    model = model.to(device)
    loss_function = loss_function.to(device)
    model.train()
    train_loss = 0
    for batch_idx, (data, target) in enumerate(train_loader):
        data = data.to(device)
        target = target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = loss_function(output, target)
        train_loss += loss.sum().item()
        loss.backward()
        optimizer.step()
        # state = {"model_state": model.state_dict(),
        #      "optimizer_state": optimizer.state_dict(),
        #      "epoch": epoch}
        # torch.save(state, f"{checkpoint_dir}/epoch_{epoch}.pth")
        if batch_idx % 200 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
    train_loss /= len(train_loader.dataset)
    # writer.add_scalar("train-loss", train_loss, global_step=epoch)


def test(model, test_loader, loss_function, epoch, device):
    model = model.to(device)
    loss_function = loss_function.to(device)
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data = data.to(device)
            target = target.to(device)
            output = model(data)
            test_loss += loss_function(output, target).sum().item()
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    # writer.add_scalar("test-loss", test_loss, global_step=epoch)
    # writer.add_scalar("test-accuracy", 100. * correct / len(test_loader.dataset), global_step=epoch)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [103]:
def train_test(model, optimizer, train_loader, test_loader, device, n_epochs=1, cv=5):
    for i in range(cv):
        for epoch in range(0, n_epochs):
            train(model, train_loader, optimizer, criterion, epoch, device)
            test(model, test_loader, criterion, epoch, device)

In [104]:
train_test(model=model,
           optimizer=optimizer,
           train_loader=train_loader,
           test_loader=test_loader,
           device=device,
           n_epochs=5
           )


Test set: Average loss: 37.0479, Accuracy: 151/200 (76%)


Test set: Average loss: 0.0160, Accuracy: 143/200 (72%)


Test set: Average loss: 0.0136, Accuracy: 160/200 (80%)


Test set: Average loss: 0.0126, Accuracy: 163/200 (82%)


Test set: Average loss: 0.0112, Accuracy: 169/200 (84%)



**Upload Model Artifact to S3**

In [105]:
model_filename = "model_resnet18"
model_folder = "models/model_resnet18"

# %mkdir -p models

torch.save(model.state_dict(), f'{model_folder}/{model_filename}.pth')

In [None]:
bucket = 'wildfires'
prefix = 'sagemaker/fire-image-classification'

# Upload the training script to S3
s3 = boto3.client('s3')
s3.upload_file('/home/sagemaker-user/Training/models/model_resnet18/code/inference.py', bucket, f'{prefix}/inference.py')


In [None]:
%cd models
%cd model_resnet18
!tar -czvf model_resnet18.tar.gz model_resnet18.pth code/

In [None]:
s3 = boto3.client('s3')

BUCKET_NAME = 'wildfires'

s3.upload_file(
    f'{model_filename}.tar.gz',
    BUCKET_NAME,
    f"models/{model_filename}.tar.gz",
)

**Continue with Deployment/deployment.ipynb or ModelRegistry/model_registry.ipynb**