# Ingest raster data into database
This notebook shows a technique to ingest gridded data into a database

In [None]:
# Utilities
import geopandas as gpd
import pandas as pd
import geopandas as gpd
import numpy as np
import pandas as pd
import rasterio as rio

In [None]:
### read raster
raster_2band = "path/to/raster/LU001C/LU001C.tiff"

with rio.Env():
    with rio.open(raster_2band) as src:
        crs = src.crs

        # create 1D coordinate arrays (coordinates of the pixel center)
        xmin, ymax = np.around(src.xy(0.00, 0.00), 9)  # src.xy(0, 0)
        xmax, ymin = np.around(src.xy(src.height - 1, src.width - 1), 9)
        x = np.linspace(xmin, xmax, src.width)
        y = np.linspace(
            ymax, ymin, src.height
        )  # max -> min so coords are top -> bottom

        # create 2D arrays
        xs, ys = np.meshgrid(x, y)
        b1 = src.read(1)
        b2 = src.read(2)

        # Apply NoData mask
        mask = src.read_masks(1) > 0
        xs, ys, b1, b2 = xs[mask], ys[mask], b1[mask], b2[mask]

data = {
    "X": pd.Series(xs.ravel()),
    "Y": pd.Series(ys.ravel()),
    "city_code": pd.Series(b1.ravel()),
    "urban_atlas_2018": pd.Series(b2.ravel()),
}

df = pd.DataFrame(data=data)
geometry = gpd.points_from_xy(df.X, df.Y)
gdf = gpd.GeoDataFrame(df, crs=crs, geometry=geometry)

print(gdf.head())

print("------------")

### combine different dimensions:
combine_table_cube_urban = (
    df.groupby(["city_code", "urban_atlas_2018"])
    .size()
    .reset_index()
    .rename(columns={0: "count"})
)

### write data to table
ouptut_table = "path/to/tables/urban_cube_v2.csv"
combine_table_cube_urban.to_csv(ouptut_table, index=False)

In [None]:
# Save to sqlite database
import sqlite3
conn = sqlite3.connect("path/to/urban_cube.sqlite")
combine_table_cube_urban.to_sql("urban_cube", conn, if_exists="replace", index=False)