# Input data

> Input data utilities to use Clay.

In [1]:
# | default_exp data
%load_ext autoreload
%autoreload 2

In [2]:
#| hide
import madewithclay

import inspect

print(inspect.getsourcefile(madewithclay))
print(inspect.getsourcefile(madewithclay.data))
print(inspect.getsourcefile(madewithclay.embeddings))

/home/brunosan/code/Clay/documentation/madewithclay/__init__.py
/home/brunosan/code/Clay/documentation/madewithclay/data.py
/home/brunosan/code/Clay/documentation/madewithclay/embeddings.py


In [3]:
#| export
from pathlib import Path
from shapely.geometry import Polygon,Point
from typing import List, Union
import datetime


When running Clay with input data, you need to first prepare the data. Depending on the Clay model version, the model expects specific files, bands, and data types. This package provides utilities to prepare the data for Clay.

In [4]:
#| export
class factory:

    def __init__(self, 
                locations:  Union[Point, # location point to prepare data.
                                  Polygon, # location polygon to prepare data.
                                  List[Union[Point, # list of location points to prepare data.
                                             Polygon]]] , # list of location polygons to prepare data.
                times: Union[ datetime.time ,List[datetime.time]], # list of times to prepare data.
                model_version: str, # model version for which to prepare data.
                local_folder: Path): # local folder to store data.
        
        if not isinstance(locations, list):
            locations = [locations]
        if not isinstance(times, list):
            times = [times]
        
        self.locations = locations
        self.times = times
        self.model_version = model_version
        self.local_folder = local_folder

        self.prepare_data()
    
    def prepare_data(self):
        self.files = []
        for location in self.locations:
            for time in self.times:
                batch_files = self.prepare_data_for_location_and_time(location,time)
                self.files.append(batch_files)
        self.files = [item for sublist in self.files for item in sublist]
        self.len = len(self.files)
    
    def prepare_data_for_location_and_time(self,
                                           location: Union[Point,Polygon], # location to prepare data.
                                           time: str): # time to prepare data.
        #TODO
        # Search assets on Planetary Computer
        # Download assets
        # save into local_folder

        # make list of *.tif file on local_folder
        print("Method prepare_data_for_location_and_time not implemented yet.")
        files = ['file1.tif','file2.tif','file3.tif']
        return files
        
        
    def rgb(self,
            at_most: int = 10): # at most how many random images to plot.
        """
        Plot RGB images.
        """
        #pick at most 10 random files from self.files
        print("Method not implemented yet.")
        


The unit of data is the section of a MGRS tile of size `512x512`, called "patch". When giving a point, the data factory will create only the data for that patch.

In [5]:
copenhagen = Point(12.568337, 55.676098)
time = datetime.datetime(2020, 1, 1)
model_version = "v0"
local_folder = Path("tmp/data")

input_data=factory(copenhagen,time,model_version,local_folder)
print(f"{input_data.len} files.")



Method prepare_data_for_location_and_time not implemented yet.
3 files.


You can visualize the data with the `data.rgb` method.

In [6]:
input_data.rgb()

Method not implemented yet.


In [7]:
#| hide
import nbdev
nbdev.nbdev_export()