# Nähste Wetterstation finden

In [55]:
import tkinter as tk
from tkinter import filedialog, messagebox
import pandas as pd
from geopy.distance import geodesic
import threading

class WeatherStationFinder:
    def __init__(self, root):
        self.root = root
        self.df = None
        self.file_loaded = False  # Flag to track if the file has been loaded
        self.setup_ui()

    def setup_ui(self):
        self.root.title("Nearest Weather Station Finder")

        self.load_button = tk.Button(self.root, text="Load Data", command=self.load_excel_file)
        self.load_button.grid(row=0, column=0, columnspan=2, padx=10, pady=10)
        
        tk.Label(self.root, text="Latitude:").grid(row=1, column=0, padx=10, pady=10)
        self.entry_lat = tk.Entry(self.root)
        self.entry_lat.grid(row=1, column=1, padx=10, pady=10)
        self.entry_lat.insert(0, "52.5200")  # Default latitude (example)

        tk.Label(self.root, text="Longitude:").grid(row=2, column=0, padx=10, pady=10)
        self.entry_lon = tk.Entry(self.root)
        self.entry_lon.grid(row=2, column=1, padx=10, pady=10)
        self.entry_lon.insert(0, "13.4050")  # Default longitude (example)


        self.find_button = tk.Button(self.root, text="Find Nearest Station", command=self.find_nearest_station)
        self.find_button.grid(row=3, column=0, columnspan=2, padx=10, pady=10)

        self.result_label = tk.Label(self.root, text="Nearest Weather Station: ")
        self.result_label.grid(row=4, column=0, columnspan=2, padx=10, pady=10)

    def load_excel_file(self):
        print("Loading Excel file...")
        initial_file = "Standorte Winddaten .xlsx"
        file_path = filedialog.askopenfilename(initialfile=initial_file, filetypes=[("Excel files", "*.xlsx;*.xls")])
        if not file_path:
            messagebox.showerror("Error", "No file selected.")
            return
        
        try:
            print(f"File selected: {file_path}")
            self.df = pd.read_excel(file_path)
            if 'Geogr_Laenge' not in self.df.columns or 'Geogr_Breite' not in self.df.columns or 'SDO_Name' not in self.df.columns:
                messagebox.showerror("Error", "Invalid Excel format. Required columns are missing.")
                self.file_loaded = False
            else:
                self.file_loaded = True  # Set the flag indicating file is loaded
                print("Excel file loaded successfully.")
        except Exception as e:
            messagebox.showerror("Error", f"Failed to read the Excel file. Error: {e}")
            self.file_loaded = False

    def find_nearest_station(self):
        print("Finding nearest station...")
        if not self.file_loaded:
            messagebox.showerror("Error", "Please load a valid Excel file first.")
            return

        # Get latitude and longitude from entries
        try:
            lat = float(self.entry_lat.get())
            lon = float(self.entry_lon.get())
            print(f"Latitude: {lat}, Longitude: {lon}")
        except ValueError:
            messagebox.showerror("Error", "Invalid input. Please enter valid numbers for latitude and longitude.")
            return

        # Run the calculation in a separate thread
        threading.Thread(target=self.calculate_nearest_station, args=(lat, lon)).start()

    def calculate_nearest_station(self, lat, lon):
        print("Calculating nearest station...")
        # Find the nearest station
        min_distance = float('inf')
        nearest_station = None
        nearest_station_lat = None
        nearest_station_lon = None
        for index, row in self.df.iterrows():
            station_coord = (row['Geogr_Breite'], row['Geogr_Laenge'])
            distance = geodesic((lat, lon), station_coord).kilometers
            if distance < min_distance:
                min_distance = distance
                nearest_station = row['SDO_Name']
                nearest_station_lat = row['Geogr_Breite']
                nearest_station_lon = row['Geogr_Laenge']

        # Update the result label in the main thread
        self.root.after(0, self.update_result_label, nearest_station, nearest_station_lon, nearest_station_lat)

    def update_result_label(self, nearest_station, nearest_station_lon, nearest_station_lat):
        print(f"Nearest station found: {nearest_station}")
        self.result_label.config(text=f"Nearest Weather Station: {nearest_station}\nLat: {nearest_station_lat}\tLon: {nearest_station_lon}")

if __name__ == "__main__":
    root = tk.Tk()
    app = WeatherStationFinder(root)
    root.mainloop()


Loading Excel file...
File selected: C:/Users/Caj/OneDrive/HTW/THW BBK RE im KatSchutz/Programme/Daten einlesen/Standorte Winddaten .xlsx
Excel file loaded successfully.
Finding nearest station...
Latitude: 52.52, Longitude: 13.405
Calculating nearest station...
Nearest station found: Berlin-Alexanderplatz


# Daten aus Wetterstationen zeigen 

In [71]:
import tkinter as tk
from tkinter import filedialog, messagebox
import pandas as pd

# Funktion, die aufgerufen wird, wenn der Button geklickt wird
def open_file():
    file_path = filedialog.askopenfilename(filetypes=[("CSV Files", "*.csv"), ("Excel Files", "*.xlsx")])
    if file_path:
        try:
            # DataFrame aus der ausgewählten Datei erstellen
            df = pd.read_csv(file_path)  # Für CSV-Dateien
            # df = pd.read_excel(file_path)  # Für Excel-Dateien

            # Hier alle möglichen Informationen aus dem DataFrame extrahieren
            num_rows = len(df)  # Anzahl der Datensätze
            size_mb = df.memory_usage(deep=True).sum() / (1024 * 1024)  # Größe der Daten in MB

            # Informationen als Label anzeigen
            info_label.config(text=f"Anzahl Datensätze: {num_rows}\nGröße der Daten: {size_mb:.2f} MB")
            print(df.head())
            df.info()  # Informationen über den DataFrame anzeigen (Datentypen, Nicht-Null-Werte, Speicherverbrauch usw.)
            df.describe()  # Statistische Zusammenfassung (z. B. Anzahl, Mittelwert, Standardabweichung, Minimum, Maximum)
            df.columns  # Liste der Spaltennamen anzeigen


        except Exception as e:
            messagebox.showerror("Fehler", f"Fehler beim Öffnen der Datei:\n{str(e)}")

# GUI initialisieren
root = tk.Tk()
root.title("Datei öffnen und Informationen anzeigen")

# Button zum Öffnen der Datei
open_button = tk.Button(root, text="Datei öffnen", command=open_file)
open_button.pack(pady=20)

# Label für die Informationen
info_label = tk.Label(root, text="")
info_label.pack(pady=10)

# Hauptloop der GUI starten
root.mainloop()


                    Produkt_Code               SDO_ID  Zeitstempel  Wert  \
OBS_DEU_PT10M_F_MN          1001  2023-11-01T00:00:00          0.7    11   
OBS_DEU_PT10M_F_MN          1001  2023-11-01T00:10:00          0.8    11   
OBS_DEU_PT10M_F_MN          1001  2023-11-01T00:20:00          1.1    11   
OBS_DEU_PT10M_F_MN          1001  2023-11-01T00:30:00          1.0    11   
OBS_DEU_PT10M_F_MN          1001  2023-11-01T00:40:00          0.9    12   

                    Qualitaet_Byte  Qualitaet_Niveau  
OBS_DEU_PT10M_F_MN               3               NaN  
OBS_DEU_PT10M_F_MN               3               NaN  
OBS_DEU_PT10M_F_MN               3               NaN  
OBS_DEU_PT10M_F_MN               3               NaN  
OBS_DEU_PT10M_F_MN               3               NaN  
<class 'pandas.core.frame.DataFrame'>
Index: 2343144 entries, OBS_DEU_PT10M_F_MN to OBS_DEU_PT10M_F_MN
Data columns (total 6 columns):
 #   Column            Dtype  
---  ------            -----  
 0   Produkt_Cod

In [9]:
import pandas as pd

# DataFrame aus einer CSV-Datei laden
df = pd.read_csv('test Datein/testWetterstationen.csv')

# Statistische Zusammenfassung anzeigen
print(df.describe())



# Durchschnitt für jede Kategorie berechnen
mean_by_category = df.groupby('Kategorie').mean()

# Histogramm für eine Spalte erstellen und anzeigen
df['Spalte3'].plot(kind='hist', bins=20, title='Histogramm von Spalte3')
plt.xlabel('Werte')
plt.ylabel('Anzahl')
plt.show()


ParserError: Error tokenizing data. C error: Expected 2 fields in line 3, saw 3


In [80]:
import pandas as pd

# Pfad zur CSV-Datei
csv_file = 'data3Stationen.csv'

# Daten aus der CSV-Datei laden
data = pd.read_csv(csv_file)

# Pivot-Tabelle erstellen, um die Daten umzuformen
pivot_table = data.pivot(index='SDO_ID', columns='Zeitstempel', values='Wert')

# Neue Spaltennamen erstellen, indem '_Wert' an SDO_ID angehängt wird
new_columns = [f'{col}_Wert' for col in pivot_table.columns]

# Spaltenüberschriften aktualisieren
pivot_table.columns = new_columns

# Daten in eine Excel-Datei speichern
excel_file = 'pfad/zum/ausgabedatei.xlsx'
pivot_table.to_excel(excel_file, index=True, engine='openpyxl')

print(f'Daten wurden erfolgreich in {excel_file} gespeichert.')


ValueError: Index contains duplicate entries, cannot reshape

In [1]:
import pandas as pd

# Lese die CSV-Datei in ein pandas DataFrame
df = pd.read_csv('test3stationene.csv')

# Erstelle ein leeres DataFrame für die aggregierten Daten
df_aggregated = pd.DataFrame()

# Iteriere über eindeutige SDO_IDs, um Daten zu gruppieren
for sdo_id in df['SDO_ID'].unique():
    # Filtere Daten für die aktuelle SDO_ID
    subset = df[df['SDO_ID'] == sdo_id][['Zeitstempel', 'Wert']]
    
    # Benenne die Spalten um entsprechend der SDO_ID
    subset.columns = [f'{sdo_id}_Zeitstempel', f'{sdo_id}_Wert']
    
    # Füge die subset Daten zum aggregierten DataFrame hinzu
    df_aggregated = pd.concat([df_aggregated, subset], axis=1)

# Speichere das aggregierte DataFrame in eine Excel-Datei
output_file = 'aggregierte_daten.xlsx'
df_aggregated.to_excel(output_file, index=False, engine='openpyxl')

print(f'Die aggregierten Daten wurden in "{output_file}" gespeichert.')


ParserError: Error tokenizing data. C error: Expected 2 fields in line 3, saw 3


In [13]:
import csv

with open('test Datein/testWetterstationen.csv', 'r') as file:
    try:
        reader = csv.reader(file)
        for row in reader:
            # Process each row
            print(row)
    except csv.Error as e:
        print(f'Error while parsing CSV: {e}')


['Produkt_Code;SDO_ID;Zeitstempel;Wert']
['OBS_DEU_PT10M_F;1975;44562;6', '1']
['OBS_DEU_PT10M_F;1975;44562', '00694;5', '5']
['OBS_DEU_PT10M_F;1975;44562', '01389;5', '2']
['OBS_DEU_PT10M_F;1975;44562', '02083;5']
['OBS_DEU_PT10M_F;1975;44562', '02778;4', '9']
['OBS_DEU_PT10M_F;1975;44562', '03472;5', '1']
['OBS_DEU_PT10M_F;1975;44562', '04167;5', '5']
['OBS_DEU_PT10M_F;1975;44562', '04861;5', '6']
['OBS_DEU_PT10M_F;1975;44562', '05556;5', '4']
['OBS_DEU_PT10M_F;1975;44562', '0625;4', '7']
['OBS_DEU_PT10M_F;1975;44562', '06944;4', '8']
['OBS_DEU_PT10M_F;1975;44562', '07639;5']
['OBS_DEU_PT10M_F;1975;44562', '08333;4', '2']
['OBS_DEU_PT10M_F;1975;44562', '09028;5']
['OBS_DEU_PT10M_F;1975;44562', '09722;5']
['OBS_DEU_PT10M_F;1975;44562', '10417;4', '9']
['OBS_DEU_PT10M_F;1975;44562', '11111;5', '3']
['OBS_DEU_PT10M_F;1975;44562', '11806;5', '2']
['OBS_DEU_PT10M_F;433;44562;5', '5']
['OBS_DEU_PT10M_F;433;44562', '00694;5', '1']
['OBS_DEU_PT10M_F;433;44562', '01389;4', '5']
['OBS_DEU_PT10

In [15]:
import csv

with open('data3Stationen.csv', 'r') as file:
    try:
        reader = csv.reader(file)
        for row in reader:
            # Process each row
            print(row)
    except csv.Error as e:
        print(f'Error while parsing CSV: {e}')


IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)



['OBS_DEU_PT10M_F', '4642', '2023-12-05T18:40:00', '4.9', '111', '3', '']
['OBS_DEU_PT10M_F', '4642', '2023-12-05T18:50:00', '4.7', '111', '3', '']
['OBS_DEU_PT10M_F', '4642', '2023-12-05T19:00:00', '4.6', '111', '3', '']
['OBS_DEU_PT10M_F', '4642', '2023-12-05T19:10:00', '4.4', '111', '3', '']
['OBS_DEU_PT10M_F', '4642', '2023-12-05T19:20:00', '4.6', '111', '3', '']
['OBS_DEU_PT10M_F', '4642', '2023-12-05T19:30:00', '4.1', '111', '3', '']
['OBS_DEU_PT10M_F', '4642', '2023-12-05T19:40:00', '3.7', '111', '3', '']
['OBS_DEU_PT10M_F', '4642', '2023-12-05T19:50:00', '3.4', '111', '3', '']
['OBS_DEU_PT10M_F', '4642', '2023-12-05T20:00:00', '3.7', '111', '3', '']
['OBS_DEU_PT10M_F', '4642', '2023-12-05T20:10:00', '3.8', '111', '3', '']
['OBS_DEU_PT10M_F', '4642', '2023-12-05T20:20:00', '3.7', '111', '3', '']
['OBS_DEU_PT10M_F', '4642', '2023-12-05T20:30:00', '4.1', '111', '3', '']
['OBS_DEU_PT10M_F', '4642', '2023-12-05T20:40:00', '4.4', '111', '3', '']
['OBS_DEU_PT10M_F', '4642', '2023-12-0