In [82]:

import h5py
from tqdm import tqdm
import os
from src.grain_classification import create_mask_dataset as mask_code
import pandas as pd
import geopandas as gpd


runtime_name = "testing"
data_path = "../../kornmo-data-files/raw-data/crop-classification-data"
crop_types = ['hvete', 'bygg', 'havre', 'rug_og_rughvete']


masks_path = f"{data_path}/{runtime_name}/{runtime_name}_masks.h5"
mask_code.create_mask_file(masks_path)

all_fields = gpd.read_file(f"{data_path}/all_data.gpkg")

all_predicted_labels = pd.read_csv(f"{data_path}/{runtime_name}/predicted_labels.csv")

all_bounding_boxes = gpd.read_file('../../kornmo-data-files/raw-data/farm-information/farm-properties/bounding-boxes-previous-students/disponerte_eiendommer_bboxes.shp')




In [83]:

def create_crop_specific_mask(orgnum, year, crop_type):

    if crop_type == "rug_og_rughvete":
        fields = all_predicted_labels[(all_predicted_labels['orgnr'] == int(orgnum)) & (all_predicted_labels['year'] == int(year)) & ((all_predicted_labels['crop_type'] == "rug") | (all_predicted_labels['crop_type'] == "rughvete"))]

    else:
        fields = all_predicted_labels[(all_predicted_labels['orgnr'] == int(orgnum)) & (all_predicted_labels['year'] == int(year)) & (all_predicted_labels['crop_type'] == crop_type)]

    field_ids = fields["field_id"].tolist()

    if len(field_ids) > 0:

        bounding_box = all_bounding_boxes[all_bounding_boxes['orgnr'] == int(orgnum)]

        if len(bounding_box) >= 1:
            one_bounding_box = bounding_box[bounding_box['year'] == int(year)]

            if len(one_bounding_box) != 1:
                one_bounding_box = bounding_box.head(1)

            bounding_box_polygon = mask_code.convert_crs(one_bounding_box['geometry'])[0]
            bbox = mask_code.boundingBox(bounding_box_polygon.centroid.y, bounding_box_polygon.centroid.x, 1)
            bbox = mask_code.box(bbox[0], bbox[1], bbox[2], bbox[3])


            field_geometry = []
            total_area = 0
            for i in range(len(field_ids)):
                row = all_fields.loc[field_ids[i]]
                total_area = total_area + row['area']
                field_geometry.append(row['geometry'])

            field_polygon = mask_code.convert_crs(field_geometry)[0]
            mask = mask_code.generate_mask_image(bbox, field_polygon)


            return mask, total_area

        else:
            print(f"Found satellite images for {orgnum}, but no bounding boxes")


    return 0, 0



In [84]:

satellite_image_location = "E:/MasterThesisData/Satellite_Images/"
field_areas = pd.DataFrame(columns=['orgnr', 'year', 'crop_type', 'area'])
counter = 0

for filename in ['sentinel_100x100_0.h5', 'sentinel_100x100_1.h5']:
    with h5py.File(os.path.join(satellite_image_location, filename), "r") as file:

        images = file['images']
        for _, orgnum in enumerate(tqdm(images.keys(), total=len(images))):
            for year in images[orgnum]:


                for crop_type in crop_types:

                    new_crop_mask, area = create_crop_specific_mask(orgnum, year, crop_type)

                    if new_crop_mask != 0:
                        mask_code.insert_mask(masks_path, f"{int(orgnum)}/{int(year)}/{crop_type}", new_crop_mask)

                        data = {'orgnr': int(orgnum), 'year': int(year), 'crop_type': crop_type, 'area':area}
                        row = pd.Series(data=data, index=['orgnr', 'year', 'crop_type', 'area'])
                        field_areas = pd.concat([field_areas, row.to_frame().T])

                    else:
                        counter = counter + 1

    file.close()

field_areas.to_csv(f"{data_path}/{runtime_name}/field_areas.csv")

print("Done")
print(f"Skipped {counter} sets of [farm, year, type]")



100%|██████████| 4135/4135 [00:31<00:00, 130.50it/s]
100%|██████████| 3477/3477 [00:22<00:00, 156.27it/s]

Done
Skipped 67979 sets of [farm, year, type]



