# Utilities

## Images extractor

In [None]:
import tarfile
from pathlib import Path

In [None]:
def extract_images(in_dir, out_dir):
    for tar_file in in_dir:
        print(f'Extracting {tar_file.parent.name}...')
        with tarfile.open(tar_file, 'r') as file:
            out_folder = out_dir / tar_file.parent.name
            file.extractall(out_folder)

In [None]:
root_dir = Path('/mnt/d/Projects/Science/Programming/lulc-utility')

# Directory containing your ZIP files
images_dir = root_dir / 'cache/imagery/sentinel_hub/imagery_v1'
images_dir.mkdir(exist_ok=True)

# Get a list of all TAR files in the directory
tars_paths = list(images_dir.glob('**/response.tar'))

# Directory to extract s2.tif images
out_dir = root_dir / 'data/extracted_images'
out_dir.mkdir(exist_ok=True)

# Extract s2.tif images from ZIP files
s2_paths = extract_images(tars_paths, out_dir)

Extracting 07cad76ece79666f2266caab083df113...
Extracting 1b66267bb7d4bffc8bb86d91283fc891...
Extracting 20eb07faec1640440d1e1279e87f252e...
Extracting 25e181e04ce7d5c2bb8fac881b4949b6...
Extracting 261bd6dcdfd963215dbc44d8b4ffae3f...
Extracting 292c69fe3b20d52dd3b179fbdd5ab883...
Extracting 344fc49564640817ec8cd6c340656235...
Extracting 38e0c5281c4b3658915596930fdac5a0...
Extracting 515419aae6738159b01333e4a0e8839e...
Extracting 552fc2b4756b67263ff0cc0ee6c62499...
Extracting 69b5c650431733af58141d1bbffcdec7...
Extracting 842ace24c09d18c22294ebdb793d0a03...
Extracting 921ee428be500cd1e4d98b48f09b5ca8...
Extracting a70b8c57a707ce65c8647f0432ffc4d6...
Extracting ae5f1056ab93146b69eb122896444734...
Extracting c29727a5adce09b6bb449e731cafc77b...
Extracting df29a6d2d8f8af1534dcd9282727bb5a...
Extracting e6774188e5722eaabe1c4b83aca49a32...
Extracting e858828bf21ce8b1d01255cb3f355885...
Extracting f00beb674b9ae1c79a7ee4d544aaa84d...


## Tag comparison

In [20]:
import pandas as pd
from pathlib import Path

in_dir = Path('outdir')
df = pd.DataFrame()

# Open and read the files
with open(in_dir / 'motorways_cols_Heidelberg.txt', 'r') as f:
    tags_hd = set(line.strip() for line in f)
with open(in_dir / 'motorways_cols_Mannheim.txt', 'r') as f:
    tags_ma = set(line.strip() for line in f)
with open(in_dir / 'motorways_cols_Lodz.txt', 'r') as f:
    tags_lz = set(line.strip() for line in f)

In [21]:
common_tags = tags_hd & tags_ma & tags_lz
df['common_tags'] = pd.Series(list(common_tags))

In [22]:
# Find unique tags in each file
unique_tags_hd = tags_hd - tags_ma - tags_lz
unique_tags_ma = tags_ma - tags_hd - tags_lz
unique_tags_lz = tags_lz - tags_hd - tags_ma


df['unique_tags_hd'] = pd.Series(list(unique_tags_hd))
df = df.reindex(range(len(unique_tags_ma)))
df['unique_tags_ma'] = pd.Series(list(unique_tags_ma))
df['unique_tags_lz'] = pd.Series(list(unique_tags_lz))

In [24]:
df = df.apply(lambda col: col.sort_values().reset_index(drop=True))
df

Unnamed: 0,common_tags,unique_tags_hd,unique_tags_ma,unique_tags_lz
0,bicycle,access:lanes,TMC:cid_58:tabcd_1:Class,cutting
1,bridge,bus:lanes,TMC:cid_58:tabcd_1:Direction,destination:int_ref
2,destination,cycleway:both,TMC:cid_58:tabcd_1:LCLversion,highway:category:pl
3,destination:lanes,fixme,TMC:cid_58:tabcd_1:LocationCode,highway:class:pl
4,destination:ref,highway:note,TMC:cid_58:tabcd_1:NextLocationCode,maxaxleload
...,...,...,...,...
66,,,turn,
67,,,turn:lanes:backward,
68,,,turn:lanes:forward,
69,,,width:lanes,


## Raster/array shape examiner

In [30]:
import tarfile
import rasterio as rio
import geopandas as gpd

data_dir = Path('/mnt/d/UniHeidelberg/Kurse/Masterarbeit/lulc-utility/cache')

In [27]:
with rio.open(data_dir / 'osm/v3/built-up/4c23b4d2-41b0-0b7b-0c6b-99c7d29cdb64.tiff') as src:
    data = src.read()

data.shape

(1, 1150, 1759)

In [32]:
with tarfile.open(data_dir / 'imagery/sentinel_hub/imagery_v1/1d1c73d043fa298d192b28bd38120f77/response.tar', 'r') as tar:
    tif_file = tar.extractfile('s2.tif')
    with rio.open(tif_file) as src:
        print(src.shape)

(1266, 1285)
