# Purpose

- Converts the geospatial data within `stamdata_2022.xlsx` from `UTM 32N` to `Lat/Lng`.
- Convert the `.gpkg` file to csv.

### Input files

TODO

### Outputs files
- `wind_turbines.csv`
- `wind_parks.json`

In [70]:
# File processing libraries
import zipfile
import pathlib
    
# Data processing libraries
import pandas as pd

# Geospatial libraries
import geopandas as gpd
import shapely
import pyproj
#import utm

# Network libraries
from urllib.request import urlretrieve

In [77]:
wind_master_data_location = "https://ens.dk/sites/ens.dk/files/Analyser/stamdata_eksisterende_moeller_2023_01_etrs1989_utm32n.zip"
wind_master_data_filename = "stamdata_eksisterende_moeller_2023_01_etrs1989_utm32n.zip"
wind_master_data_folder = wind_master_data_filename.split('.')[0]

if not pathlib.Path(wind_master_data_filename).exists():
    # Download file
    urlretrieve(wind_master_data_location, wind_master_data_filename)
    
    # Extract file
    with zipfile.ZipFile(wind_master_data_filename, 'r') as zip_ref:
        zip_ref.extractall('.')

In [91]:
# Load data
turbines = gpd.read_file(wind_master_data_folder)

In [92]:
# Convert geometry from UTM 32N to Lat/Lng
wgs84 = pyproj.CRS('EPSG:4326')
utm = pyproj.CRS('EPSG:25832')
project = pyproj.Transformer.from_crs(utm, wgs84, always_xy=True).transform
new_geo = turbines.apply(lambda row: shapely.ops.transform(project, row.geometry), axis=1)

In [121]:
# TODO - Remove?
if False:
    # Group years into decades
    turbines['construction_decade'] = 1900
    turbines.fillna(0, inplace=True)

    cols = [c for c in turbines.columns if 'Y198' in c]
    mask = turbines.loc[:, cols].astype(float).sum()
    turbines.loc[mask, 'construction_decade'] = '1970s'

Unnamed: 0,Y1977,Y1978,Y1979
0,0.0,0,0
1,0.0,0,0
2,0.0,0,0
3,0.0,0,0
4,0.0,0,0
...,...,...,...
6145,0.0,0,0
6146,0.0,0,0
6147,0.0,0,0
6148,0.0,0,0


In [125]:
# Create new column with lat/lng
turbines['lng'] = new_geo.geometry.x
turbines['lat'] = new_geo.geometry.y

# Remove columns that are not interesting to save disk space
turbines = turbines[['GSRN', 'Kapacitet_', 'Rotordiame', 'Navhøjde_', 'Fabrikat', 'Y2022_10', 'Y2022_11', 'Y2022_12', 'lat', 'lng', 'geometry']]

In [126]:
turbines.to_file('turbines.geojson', driver="GeoJSON")  