In [2]:
import pandas as pd
import geopandas as gpd
import numpy as np
from tqdm import tqdm
from shapely.geometry import box, Polygon
from shapely.ops import transform
from shapely import wkt
import matplotlib.pyplot as plt
import os, sys
import pyproj

#Enables imports from src directory in notebooks
sys.path.insert(0, os.path.abspath('../src'))
sys.path.insert(0, os.path.abspath('../src/data_handling'))
#Auto update imports when python files in src is updated
%load_ext autoreload
%autoreload 2

from satellite_images import read_sat_images_file
from utils import boundingBox, write_polygons_to_shp, plot_polygons, png_to_geotiff


data_location = "../../kornmo-data-files/raw-data"


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [71]:
def get_farmer_centroid(nrows=None):
    farmer_centroid = pd.read_csv(os.path.join(data_location, 'farm-information/centroid_coordinates_new.csv'), delimiter=',', nrows=nrows)
    columns_to_keep = ['orgnr', 'latitude', 'longitude', 'kommunenr']
    farmer_centroid = farmer_centroid.filter(columns_to_keep)
    farmer_centroid = farmer_centroid.dropna()
    farmer_centroid['orgnr'] = farmer_centroid['orgnr'].astype(str)
    farmer_centroid['latitude'] = farmer_centroid['latitude'].astype(float)
    farmer_centroid['longitude'] = farmer_centroid['longitude'].astype(float)
    farmer_centroid['kommunenr'] = farmer_centroid['kommunenr'].astype(int)
    return farmer_centroid

def get_polygon_data(nrows=None):
    
    soilquality = pd.read_csv(os.path.join(data_location, 'soil-data/jordsmonn_geometry.csv'), dtype=str, nrows=nrows)
    soilquality = soilquality.dropna()
    soilquality['geometry'] = soilquality['geometry'].apply(wkt.loads)
    geo_soilquality = gpd.GeoDataFrame(soilquality, crs='epsg:4326')
    geo_soilquality['municipal_nr'] = geo_soilquality['municipal_nr'].astype(int)
    
    return geo_soilquality

def get_disp_eiendommer():
    disp_eien = gpd.read_file(os.path.join(data_location, 'farm-information/farm-properties/disposed-properties-previous-students/disponerte_eiendommer.gpkg'), layer='disponerte_eiendommer')
    disp_eien = disp_eien.dropna()
    disp_eien.drop_duplicates(['orgnr', 'geometry'], keep='first', inplace=True)
    disp_eien['orgnr'] = disp_eien['orgnr'].astype(str)

    return disp_eien

def get_combined_satellite_data():
    sat_images0 = read_sat_images_file('sentinel_100x100_0.h5')
    sat_images1 = read_sat_images_file('sentinel_100x100_1.h5')
    in_first = set(sat_images0)
    in_second = set(sat_images1)

    in_second_but_not_in_first = in_second - in_first

    result = list(sat_images0) + list(in_second_but_not_in_first)

    return result


def filter_by_municipal(dataframe, municipal_nr):
    return dataframe.loc[dataframe['municipal_nr'] == municipal_nr]


def get_polygons_by_municipal(dataframe, municipal_nr):
    polygons_by_muni = dataframe.loc[dataframe['municipal_nr'] == municipal_nr]
    polygons_list = polygons_by_muni['geometry'].tolist()
    return polygons_list

def convert_crs(polygons):
    project = pyproj.Transformer.from_proj(pyproj.Proj('epsg:25833'), pyproj.Proj('epsg:4326'), always_xy=True)
    return [transform(project.transform, poly) for poly in polygons]



In [26]:
print("Retrieving data")
disp_eien = get_disp_eiendommer()
field_data = get_polygon_data()
farmer_centroid = get_farmer_centroid()

sat_orgnr = np.array(get_combined_satellite_data())


Retrieving disposed properties data, satellite data and polygon data


100%|██████████| 4135/4135 [00:00<00:00, 38365.45it/s]
100%|██████████| 3477/3477 [00:00<00:00, 33932.38it/s]


In [27]:
farm_orgnr = np.array(list(disp_eien['orgnr']))
intersection = np.intersect1d(sat_orgnr, farm_orgnr)

filtered_disp_eien = disp_eien[disp_eien['orgnr'].isin(intersection)]
filtered_satellite_data = intersection[:]

print(f"Amount of fields from disposed properties: {filtered_disp_eien.shape}")
print(f"Amount of organisation numbers from satellite data: {len(filtered_satellite_data)}")
print(f"Amount of fields from jordsmonn: {field_data.shape}")



municipal_nrs = []
idxs_to_remove = []
orgnrs_to_check = list(set(farmer_centroid['orgnr'].tolist()))
for index, row in tqdm(filtered_disp_eien.iterrows(), total=filtered_disp_eien.shape[0]):
    if row['orgnr'] in orgnrs_to_check:
        municipal_nrs.append(farmer_centroid.loc[farmer_centroid['orgnr'] == row['orgnr']]['kommunenr'].iloc[0])
    else:
        idxs_to_remove.append(index)
    
filtered_disp_eien = filtered_disp_eien.drop(idxs_to_remove)



Amount of fields from disposed properties: (17884, 3)
Amount of organisation numbers from satellite data: 6890
Amount of fields from jordsmonn: (629040, 4)


100%|██████████| 17884/17884 [00:12<00:00, 1402.25it/s]


In [32]:
filtered_disp_eien.insert(1, "municipal_nr", municipal_nrs)
print(filtered_disp_eien.shape)
filtered_disp_eien.head()

(16857, 4)


Unnamed: 0,orgnr,municipal_nr,year,geometry
0,969102404,3812,2017,"MULTIPOLYGON (((199287.149 6584342.538, 199295..."
1,983375782,3028,2017,"MULTIPOLYGON (((284208.764 6624834.683, 284208..."
3,983375782,3028,2019,"MULTIPOLYGON (((284208.764 6624834.683, 284208..."
6,983375782,3028,2018,"MULTIPOLYGON (((284208.764 6624834.683, 284208..."
11,971214074,3015,2019,"MULTIPOLYGON (((283304.242 6602938.198, 283294..."


In [36]:
field_data.head()

Unnamed: 0.1,Unnamed: 0,id,municipal_nr,geometry
0,0,0,3031,"MULTIPOLYGON (((271721.06120 6664057.75200, 27..."
1,1,1,3031,"MULTIPOLYGON (((271483.64850 6664134.17310, 27..."
2,2,2,3031,"MULTIPOLYGON (((271708.79830 6664057.24050, 27..."
3,3,3,3031,"MULTIPOLYGON (((271314.24970 6664101.64960, 27..."
4,4,4,3031,"MULTIPOLYGON (((271293.07800 6664065.84230, 27..."


In [45]:
intersections_df = []
municipal_nrs = filtered_disp_eien['municipal_nr'].tolist()

for municipal_nr in tqdm(municipal_nrs):

    filtered_disp = filter_by_municipal(filtered_disp_eien, municipal_nr)
    filtered_fields = filter_by_municipal(field_data, municipal_nr)

    polygons_fields = filtered_fields['geometry'].tolist()
    polygons_disp = filtered_disp['geometry'].tolist()
    id_fields = filtered_fields['id'].tolist()
    orgnr_disp = filtered_disp['orgnr'].tolist()
    
    for i, poly_disp in enumerate(polygons_disp):
        for j, poly_field in enumerate(polygons_fields):
            if poly_disp.intersects(poly_field):
                new_row = [orgnr_disp[i], municipal_nr, id_fields[j]]
                intersections_df.append(new_row)

    break

intersections_df = pd.DataFrame(intersections_df, columns=['orgnr', 'municipal_nr', 'field_id'])
intersections_df.to_csv(os.path.join(data_location, 'farm-information/fields_per_farm2.csv'))
print(intersections_df.head())
print(intersections_df.shape)

  0%|          | 0/16857 [00:00<?, ?it/s]

       orgnr  municipal_nr field_id
0  969102404          3812   192482
1  969102404          3812   192484
2  969102404          3812   192485
3  969102404          3812   192486
4  969102404          3812   192487
(1298, 3)





In [58]:
disp_poly = filtered_disp_eien.loc[filtered_disp_eien['municipal_nr'] == 3812]
field_poly = field_data.loc[field_data['municipal_nr'] == 3812]
#field_poly = field_data[field_data["id"].isin(intersections_df['field_id'].tolist())]
field_poly = field_data[field_data["id"].isin(['193825'])]
print(disp_poly.shape)
print(field_poly.shape)

disp_poly = disp_poly['geometry'].tolist()
field_poly = field_poly['geometry'].tolist()

write_polygons_to_shp(convert_crs(disp_poly), 'disp')
write_polygons_to_shp(convert_crs(field_poly), 'fields')

(42, 4)
(1, 4)
