In [1]:
import fastapi
import pandas as pd
import numpy as np
from utils.Database import Database
import os
import json
from dotenv import load_dotenv

# ------------ Initialize Global Variables ------------


# .env-Datei laden
load_dotenv()

# Werte abrufen
ABSOLUTE_END_DATE = os.getenv("ABSOLUTE_END_DATE")  # als String
START_DATE = os.getenv("START_DATE")
END_DATE = os.getenv("END_DATE")

# JSON-String in ein Dictionary umwandeln
BBOX = json.loads(os.getenv("BBOX"))

OUTPUT_FILENAME = os.getenv("OUTPUT_FILENAME")
COORDINATE_ROUNDING = int(os.getenv("COORDINATE_ROUNDING"))

DB_CONFIG = {
    "url": os.getenv("DB_URL"),
    "name": os.getenv("DB_NAME"),
    "collection": os.getenv("DB_COLLECTION_OCEAN_WEATHER")
}



db = Database(
    db_url=DB_CONFIG["url"], 
    db_name=DB_CONFIG["name"], 
    collection_name=DB_CONFIG["collection"]
    )

db_data_all = db.get_all_data(key="time")
db.close_connection()

df_db = pd.DataFrame(db_data_all).drop(columns=["_id"])



In [2]:
def process_dataframe(df: pd.DataFrame, convert_time: bool = False, drop_duplicates: bool = False, reorder: bool = False) -> pd.DataFrame:
    """Converts float columns to float32 and rounds latitude/longitude for consistency."""
    
    if drop_duplicates:
        df = df.drop_duplicates(keep='first')
        
    float_cols = df.select_dtypes(include=["float"]).columns
    df[float_cols] = df[float_cols].astype(np.float32)

    df["latitude"] = df["latitude"].astype(np.float32).round(COORDINATE_ROUNDING)
    df["longitude"] = df["longitude"].astype(np.float32).round(COORDINATE_ROUNDING)

    if convert_time and not np.issubdtype(df['time'].dtype, np.datetime64):
        df["time"] = pd.to_datetime(df["time"]).dt.tz_localize(None).dt.round("h")

    # put time, latitude, and longitude sla columns first
    if reorder:
        cols = ["time", "latitude", "longitude", "sla"]
        df = df[cols + [col for col in df.columns if col not in cols]]

    return df

In [3]:
print(df_db.shape)
df_cleaned = process_dataframe(df_db, convert_time=True, drop_duplicates=True, reorder=True)
df_cleaned = df_cleaned.dropna(axis=1, how='all')
display(df_cleaned.info())

(55718, 57)
<class 'pandas.core.frame.DataFrame'>
Index: 48620 entries, 0 to 55717
Data columns (total 36 columns):
 #   Column                      Non-Null Count  Dtype         
---  ------                      --------------  -----         
 0   time                        48620 non-null  datetime64[ns]
 1   latitude                    48620 non-null  float32       
 2   longitude                   48620 non-null  float32       
 3   sla                         48620 non-null  float32       
 4   depth                       48620 non-null  float32       
 5   bottomT                     48620 non-null  float32       
 6   mlotst                      48620 non-null  float32       
 7   siconc                      48620 non-null  float32       
 8   sithick                     48620 non-null  float32       
 9   so                          48620 non-null  float32       
 10  sob                         48620 non-null  float32       
 11  thetao                      48620 non-null  flo

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[float_cols] = df[float_cols].astype(np.float32)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["latitude"] = df["latitude"].astype(np.float32).round(COORDINATE_ROUNDING)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["longitude"] = df["longitude"].astype(np.float32).round(COORDINATE_ROUNDI

None

In [4]:
df_cleaned

Unnamed: 0,time,latitude,longitude,sla,depth,bottomT,mlotst,siconc,sithick,so,...,surface_pressure,cloud_cover,cloud_cover_low,cloud_cover_mid,cloud_cover_high,et0_fao_evapotranspiration,vapour_pressure_deficit,wind_speed_10m,wind_direction_10m,wind_gusts_10m
0,2025-01-31 22:00:00,54.360001,10.15,-0.141819,0.501646,4.637166,6.231868,0.0,0.0,16.243200,...,1030.500000,17.0,0.0,0.0,16.0,0.000000,0.060459,11.555103,265.532928,20.519999
1,2025-01-31 22:00:00,54.360001,10.18,-0.138981,0.501646,4.461329,6.234706,0.0,0.0,15.313917,...,1030.371460,17.0,0.0,0.0,16.0,0.000000,0.060434,11.555103,265.532928,20.519999
2,2025-01-31 22:00:00,54.380001,10.18,-0.141014,0.501646,4.306785,6.232673,0.0,0.0,16.512396,...,1030.500000,17.0,0.0,0.0,16.0,0.000000,0.060459,11.555103,265.532928,20.519999
3,2025-01-31 22:00:00,54.380001,10.21,-0.140670,0.501646,4.301152,5.686273,0.0,0.0,17.090414,...,1026.650391,17.0,0.0,0.0,16.0,0.000000,0.059705,11.555103,265.532928,20.519999
4,2025-01-31 22:00:00,54.389999,10.21,-0.141190,0.501646,4.348209,7.014448,0.0,0.0,17.920496,...,1030.500000,17.0,0.0,0.0,16.0,0.000000,0.060459,11.555103,265.532928,20.519999
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
55713,2025-01-01 00:00:00,54.470001,10.24,-0.721860,0.501646,6.840781,13.984925,0.0,0.0,19.345001,...,1008.000000,100.0,100.0,100.0,100.0,0.018280,0.106229,43.679214,226.836823,75.599998
55714,2025-01-01 00:00:00,54.470001,10.26,-0.717776,0.501646,6.766148,15.540357,0.0,0.0,19.568928,...,1008.000000,100.0,100.0,100.0,100.0,0.018280,0.106229,43.679214,226.836823,75.599998
55715,2025-01-01 00:00:00,54.470001,10.29,-0.712524,0.501646,6.663596,15.826165,0.0,0.0,19.636524,...,1008.000000,100.0,100.0,100.0,100.0,0.018280,0.106229,43.679214,226.836823,75.599998
55716,2025-01-01 00:00:00,54.470001,10.32,-0.706257,0.501646,6.583650,15.764809,0.0,0.0,19.549122,...,1008.000000,100.0,100.0,100.0,100.0,0.018280,0.106229,43.679214,226.836823,75.599998


In [5]:

# fast api
app = fastapi.FastAPI()

@app.get("/")
def read_root():
    return {"Hello": "World"}

@app.get("/ocean_data")
def read_data():
    return df_cleaned.to_dict(orient="records")