**Taking Data From Feature Store**

In [None]:
import os
import pandas as pd
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

In [None]:
from sagemaker.feature_store.feature_group import FeatureGroup

In [None]:
import boto3

sagemaker_client = boto3.client('sagemaker-runtime')

def get_feature_store_data(feature_group_name):
    # Here, we are assuming the feature group is already created and data is ingested
    # This is a simplified example of how to get data from feature store
    query = f'SELECT * FROM {feature_group_name}'
    response = sagemaker_client.get_query_execution(queryString=query)
    query_execution_id = response['QueryExecutionId']
    
    # Wait for the query to complete
    response = sagemaker_client.get_query_results(QueryExecutionId=query_execution_id)
    
    return response['ResultSet']['Rows']

feature_group_name = 'image-feature-group'
rows = get_feature_store_data(feature_group_name)

In [None]:
import os
import requests

s3_client = boto3.client('s3')

def download_images(metadata, download_dir='images'):
    if not os.path.exists(download_dir):
        os.makedirs(download_dir)

    for record in metadata:
        image_location = record['image_location']
        bucket, key = image_location.replace('s3://', '').split('/', 1)
        local_path = os.path.join(download_dir, os.path.basename(key))
        
        s3_client.download_file(bucket, key, local_path)

        record['local_path'] = local_path  # Add the local path to the record

    return metadata

metadata = [
    {
        'image_id': row[0]['Data'][0]['VarCharValue'],
        'image_location': row[0]['Data'][1]['VarCharValue'],
        'label': int(row[0]['Data'][2]['VarCharValue']),
        'image_type': row[0]['Data'][3]['VarCharValue'],
        'event_time': row[0]['Data'][4]['VarCharValue'],
    } for row in rows
]

metadata = download_images(metadata)

In [None]:
from sklearn.model_selection import train_test_split

# Split the metadata into train, validation, and test sets
train_metadata, test_metadata = train_test_split(metadata, test_size=0.2, stratify=[m['label'] for m in metadata], random_state=42)
train_metadata, val_metadata = train_test_split(train_metadata, test_size=0.25, stratify=[m['label'] for m in train_metadata], random_state=42)

print(f"Training samples: {len(train_metadata)}")
print(f"Validation samples: {len(val_metadata)}")
print(f"Test samples: {len(test_metadata)}")

In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image

class FireDataset(Dataset):
    def __init__(self, metadata, transform=None):
        self.metadata = metadata
        self.transform = transform

    def __len__(self):
        return len(self.metadata)

    def __getitem__(self, idx):
        img_path = self.metadata[idx]['local_path']
        image = Image.open(img_path).convert('RGB')
        label = self.metadata[idx]['label']
        
        if self.transform:
            image = self.transform(image)

        return image, label

In [None]:
# Define transformations for training and validation/test datasets

train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

val_test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Create datasets
train_dataset = FireDataset(train_metadata, transform=train_transform)
val_dataset = FireDataset(val_metadata, transform=val_test_transform)
test_dataset = FireDataset(test_metadata, transform=val_test_transform)

# Create dataloaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

**Experiments**

In [None]:
experiment_name = f"first-experiment-resnet-{strftime('%d-%H-%M-%S', gmtime())}"

**Taking Data From Feature Store**

In [None]:
import os
import pandas as pd
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

In [None]:
from sagemaker.feature_store.feature_group import FeatureGroup

In [None]:
import boto3

sagemaker_client = boto3.client('sagemaker-runtime')

def get_feature_store_data(feature_group_name):
    # Here, we are assuming the feature group is already created and data is ingested
    # This is a simplified example of how to get data from feature store
    query = f'SELECT * FROM {feature_group_name}'
    response = sagemaker_client.get_query_execution(queryString=query)
    query_execution_id = response['QueryExecutionId']
    
    # Wait for the query to complete
    response = sagemaker_client.get_query_results(QueryExecutionId=query_execution_id)
    
    return response['ResultSet']['Rows']

feature_group_name = 'image-feature-group'
rows = get_feature_store_data(feature_group_name)

In [None]:
import os
import requests

s3_client = boto3.client('s3')

def download_images(metadata, download_dir='images'):
    if not os.path.exists(download_dir):
        os.makedirs(download_dir)

    for record in metadata:
        image_location = record['image_location']
        bucket, key = image_location.replace('s3://', '').split('/', 1)
        local_path = os.path.join(download_dir, os.path.basename(key))
        
        s3_client.download_file(bucket, key, local_path)

        record['local_path'] = local_path  # Add the local path to the record

    return metadata

metadata = [
    {
        'image_id': row[0]['Data'][0]['VarCharValue'],
        'image_location': row[0]['Data'][1]['VarCharValue'],
        'label': int(row[0]['Data'][2]['VarCharValue']),
        'image_type': row[0]['Data'][3]['VarCharValue'],
        'event_time': row[0]['Data'][4]['VarCharValue'],
    } for row in rows
]

metadata = download_images(metadata)

In [None]:
from sklearn.model_selection import train_test_split

# Split the metadata into train, validation, and test sets
train_metadata, test_metadata = train_test_split(metadata, test_size=0.2, stratify=[m['label'] for m in metadata], random_state=42)
train_metadata, val_metadata = train_test_split(train_metadata, test_size=0.25, stratify=[m['label'] for m in train_metadata], random_state=42)

print(f"Training samples: {len(train_metadata)}")
print(f"Validation samples: {len(val_metadata)}")
print(f"Test samples: {len(test_metadata)}")

In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image

class FireDataset(Dataset):
    def __init__(self, metadata, transform=None):
        self.metadata = metadata
        self.transform = transform

    def __len__(self):
        return len(self.metadata)

    def __getitem__(self, idx):
        img_path = self.metadata[idx]['local_path']
        image = Image.open(img_path).convert('RGB')
        label = self.metadata[idx]['label']
        
        if self.transform:
            image = self.transform(image)

        return image, label

In [None]:
# Define transformations for training and validation/test datasets

train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

val_test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Create datasets
train_dataset = FireDataset(train_metadata, transform=train_transform)
val_dataset = FireDataset(val_metadata, transform=val_test_transform)
test_dataset = FireDataset(test_metadata, transform=val_test_transform)

# Create dataloaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

**Experiments**

In [None]:
experiment_name = f"first-experiment-resnet-{strftime('%d-%H-%M-%S', gmtime())}"