In [2]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
from IPython.display import Image, display
import matplotlib.pyplot as plt
import pandas as pd
#import seaborn as sns
import cv2
import numpy as np
import torchdata.datapipes as dp
#import boto3
#import skimage
import PIL.Image
import json
import fsspec
from src.globals import DATA_DIR #, LABELS_TO_INDS
from pathlib import Path

## Imported functions

In [3]:
def download_from_s3(bucket: str, remote_path: str, local_path: str, s3_client=None):
    if not s3_client:
        s3_client = boto3.client("s3")
    s3_client.download_file(bucket, remote_path, local_path)


def split_bucket_from_path(x: str):
    return "".join(x.split("/")[2:3]), "/".join(x.split("/")[3:])


def combine_bands(arrays: list[np.array], max_res=(120, 120)):
    """Combines bands into one array, upscaling each band to max_res.

    Args:
        arrays (list): list of bands
        max_res (tuple, optional): Max height & width to resize bands to. Defaults to (120, 120).

    Returns:
        np.array: Concatenated & resized bands.
    """
    result = np.zeros(shape=(*max_res, len(arrays)))
    for i, array in enumerate(arrays):
        assert array.shape[0] <= max_res[0]
        assert array.shape[1] <= max_res[1]
        result[:, :, i] = cv2.resize(array, dsize=max_res)
    return result


def get_first_n_pcs(img: np.array, num_components: int):
    """Perform PCA on a single image and return principle components which make up the most variance.

    Args:
        img (np.array): Original image of shape (h, w, num_bands).
        num_components (int): Desired number of components to be returned.

    Returns:
        np.array: Components with shape (h, w, num_components).
    """
    # Convert 2d bands into 1-d arrays
    bands_vectorized = np.zeros(shape=(img.shape[0] * img.shape[1], img.shape[2]))
    for i in range(img.shape[-1]):
        flattened_band = img[:, :, i].flatten()
        flattened_band_standard = (
            flattened_band - flattened_band.mean()
        ) / flattened_band.std()
        bands_vectorized[:, i] = flattened_band_standard

    cov = np.cov(bands_vectorized.transpose())
    eig_val, eig_vec = np.linalg.eig(cov)

    # Ordering Eigen values and vectors
    order = eig_val.argsort()[::-1]
    eig_val = eig_val[order]
    eig_vec = eig_vec[:, order]

    # Projecting data on Eigen vector directions resulting in Principal Components
    pcs = np.matmul(bands_vectorized, eig_vec)

    # Rearranging 1-d arrays to 2-d arrays of image size
    PC_2d = np.zeros((num_components, img.shape[0], img.shape[1]))
    for i in range(num_components):
        PC_2d[i, :, :] = pcs[:, i].reshape(-1, img.shape[1])

    # normalizing between 0 to 255
    PC_2d_Norm = np.zeros((num_components, img.shape[0], img.shape[1]))
    for i in range(num_components):
        PC_2d_Norm[i, :, :] = cv2.normalize(
            PC_2d[i, :, :], np.zeros(img.shape), 0, 255, cv2.NORM_MINMAX
        )

    return PC_2d_Norm[: num_components + 1, :, :]

TypeError: 'type' object is not subscriptable