In [None]:
import pandas as pd
import numpy as np
import csv
import matplotlib.pyplot as plt
from scipy.interpolate import griddata
from scipy.spatial import cKDTree

from typing import Tuple

In [None]:
incident_data = pd.read_csv("../data/Locker_Meldungen_Aufträge.csv")

In [None]:
incident_data.head()

In [None]:
# CSV-Datei mit Postleitzahlen und Koordinaten einlesen
plz_koordinaten = {}
with open('../data/plz_geocoord.csv', mode='r') as infile:
    reader = csv.reader(infile)
    next(reader)  # Überspringe die Kopfzeile
    for rows in reader:
        plz, lat, lon = rows
        plz_koordinaten[plz] = (float(lat), float(lon))

In [None]:
# Funktion zum Nachschlagen der Koordinaten
def get_coordinates(plz: str) -> Tuple[float, float]:
    return plz_koordinaten.get(plz, (np.nan, np.nan))

# Neue Spalten für Latitude und Longitude hinzufügen
incident_data[['lat', 'lon']] = incident_data['Postleitzahl'].apply(get_coordinates).apply(pd.Series)

In [None]:
equipment = incident_data.groupby(['lat', 'lon']).size().reset_index(name='count')

equipment.info()

In [None]:
# Create a grid of latitude and longitude values
lat_values = np.linspace(equipment['lat'].min(), equipment['lat'].max(), 100)
lon_values = np.linspace(equipment['lon'].min(), equipment['lon'].max(), 100)
lat_grid, lon_grid = np.meshgrid(lat_values, lon_values)

count_grid = griddata((equipment['lat'], equipment['lon']), equipment['count'], (lat_grid, lon_grid), method='linear')

plt.figure(figsize=(5,5))
plt.contourf(
    lat_grid,
    lon_grid,
    count_grid,
    cmap='coolwarm'
)
plt.colorbar(label='Incident count')

plt.show()

In [None]:
with open('../data/incident_data.csv', 'w') as outfile:
    equipment.to_csv(outfile, index=False)

In [None]:
weather_csv = pd.read_csv('../data/weather_scores.csv')

In [None]:
# Load weather data from JSON
weather_data = pd.DataFrame(weather_csv)

# Interpolate Weather values for the grid (for visualization purposes)
weather_grid = griddata((weather_data['lat'], weather_data['lon']), weather_data['weather_score_pca'], (lat_grid, lon_grid), method='linear')

# Plot the Weather data
plt.figure(figsize=(5, 5))
plt.contourf(lat_grid, lon_grid, weather_grid, cmap='coolwarm')
plt.colorbar(label='Weather score')
plt.xlabel('Latitude')
plt.ylabel('Longitude')
plt.title('Weather score Contour Plot')
plt.show()

# Create a KDTree for fast nearest-neighbor lookup
tree = cKDTree(weather_data[['lat', 'lon']])

# Define a function to interpolate weather data based on latitude and longitude
def interpolate_data(latitude, longitude, tree_object, data, k=3):
    dist, idx = tree_object.query([latitude, longitude], k=k)  # Find the k nearest neighbors
    # Handle cases where distance is zero (point is exactly at an incident location)
    if np.any(dist == 0):
        return data.iloc[idx[dist == 0]]['weather_score_pca'].values[0]
    
    if np.any(np.isnan(dist)):
        return np.nan
    
    # Extract the weather of the nearest neighbors
    nearest_score = data.iloc[idx]['weather_score_pca'].values
    
    # Inverse distance weighting
    weights = 1 / dist
    interpolated_data = np.dot(weights, nearest_score) / np.sum(weights)
    return interpolated_data

# Apply the interpolation
equipment['weather_score'] = equipment.apply(lambda row: interpolate_data(row['lat'], row['lon'], tree, weather_data) if not np.isnan(row['lat']) and not np.isnan(row['lon']) else np.nan, axis=1)

In [None]:
pearson_corr = equipment['count'].corr(equipment['weather_score'])

print(pearson_corr)

In [None]:
eq = pd.read_csv('../data/equipment.csv')
eq.info()

In [None]:
unique_EquiArt = eq['EquiArt'].unique()
for i in unique_EquiArt:
    print(i)