In [3]:
import numpy as np
import pandas as pd
import elapid
import geopandas as gpd
import matplotlib.pyplot as plt
import contextily as ctx
from sklearn.metrics import roc_auc_score
import rasterio
from rasterio.transform import from_bounds

In [None]:
# Load the datasets
background_data = pd.read_csv('birds_2018/veldleeuwerik_background_arable.csv')
presence_data = pd.read_csv('birds_2018/veldleeuwerik_arable.csv')

background_data = background_data.dropna()
presenence_data = presence_data.dropna()

# Rename columns for consistency
background_data = background_data.rename(columns={'x': 'longitude', 'y': 'latitude'})
background_data = background_data.drop(columns=['Unnamed: 0'])

presence_data = presence_data.rename(columns={'x': 'longitude', 'y': 'latitude'})
presence_data = presence_data.drop(columns=['Unnamed: 0', 'abundance_'])

# Combine presence and background data
presence_labels = np.ones(len(presence_data))  # 1 for presence
background_labels = np.zeros(len(background_data))  # 0 for background

# Combine the data into one dataset
combined_data = pd.concat([presence_data, background_data], ignore_index=True)
labels = np.concatenate([presence_labels, background_labels])

# Select environmental variables (excluding species and coordinates)
features = combined_data.drop(columns=['longitude', 'latitude'])
features_presences = presence_data.drop(columns=['longitude', 'latitude'])

# Initialize and train MaxEnt model
model = elapid.MaxentModel()
model.fit(features, labels, categorical=[1])

# Make predictions for the entire dataset
predicted_probabilities = model.predict(features)
predicted_probabilities_presences = model.predict(features_presences)

# Evaluate the model using AUC
auc_score = roc_auc_score(labels, predicted_probabilities)
print(f"Model AUC: {auc_score}")

# Evaluate the model using omission rate
threshold = 0.5 # Threshold for when predicted probability = presence
omission_rate = sum(predicted_probabilities_presences < threshold) / len(predicted_probabilities_presences)
print(f"Omission Rate: {omission_rate}")

# Plot Predicted probabilities for the test data
test_data_with_coords = combined_data.iloc[features.index][['longitude', 'latitude']]
gdf_test = gpd.GeoDataFrame(test_data_with_coords, geometry=gpd.points_from_xy(test_data_with_coords['longitude'], test_data_with_coords['latitude']), crs='EPSG:28992')

fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(5, 5))

gdf_test.plot(ax=ax, column=predicted_probabilities, cmap='RdYlBu_r', legend=True, markersize=5)
ax.set_title('Maxent Predictions for suitability')
ax.set_xlabel('X')
ax.set_xlim(168910, 194950) # set limits for study area
ax.set_ylabel('Y')
ax.set_ylim(513945, 535555)

ctx.add_basemap(ax, crs='EPSG:28992', source=ctx.providers.OpenStreetMap.Mapnik, zoom=15)

fig

# Studiegebied limieten en rasterresolutie
xmin, xmax, ymin, ymax = 168910, 194950, 513945, 535555
resolution = 10

# Bereken raster dimensies
width = int((xmax - xmin) / resolution)
height = int((ymax - ymin) / resolution)

# Maak een lege raster-array
raster_array = np.full((height, width), np.nan)

# Raster grid coördinaten
x_coords = np.linspace(xmin + resolution / 2, xmax - resolution / 2, width)
y_coords = np.linspace(ymax - resolution / 2, ymin + resolution / 2, height)  # Omgekeerd voor rasterordening

# Combineer de voorspellingen met coördinaten
data_with_predictions = combined_data.copy()
data_with_predictions['predicted_probabilities'] = predicted_probabilities

# Vul het raster met de voorspellingen
for _, row in data_with_predictions.iterrows():
    col = int((row['longitude'] - xmin) / resolution)
    row_idx = int((ymax - row['latitude']) / resolution)
    if 0 <= col < width and 0 <= row_idx < height:
        raster_array[row_idx, col] = row['predicted_probabilities']

# Opslaan als .tif bestand
transform = from_bounds(xmin, ymin, xmax, ymax, width, height)
with rasterio.open(
    'MaxEnt_outputs/MaxEnt_veldleeuwerik_arable.tif',
    'w',
    driver='GTiff',
    height=height,
    width=width,
    count=1,
    dtype=raster_array.dtype,
    crs='EPSG:28992',
    transform=transform
) as dst:
    dst.write(raster_array, 1)
    
# partial dependence plots
from sklearn.inspection import PartialDependenceDisplay, permutation_importance, partial_dependence
display = PartialDependenceDisplay.from_estimator(model, features, features=[0, 1, 2 , 3, 4, 5],kind='average')