In [2]:
import fastapi
import pandas as pd
import numpy as np
from utils.Database import Database


DB_CONFIG = {
    "url": "localhost",
    "name": "deep-learning",
    "collection": "test"
}

db = Database(
    db_url=DB_CONFIG["url"], 
    db_name=DB_CONFIG["name"], 
    collection_name=DB_CONFIG["collection"]
    )

db_data_all = db.get_all_data(key="time")
db.close_connection()

df_db = pd.DataFrame(db_data_all).drop(columns=["_id"])



In [3]:
def process_dataframe(df: pd.DataFrame, convert_time: bool = False, drop_duplicates: bool = False, reorder: bool = False) -> pd.DataFrame:
    """Converts float columns to float32 and rounds latitude/longitude for consistency."""
    
    if drop_duplicates:
        df = df.drop_duplicates(keep='first')
        
    float_cols = df.select_dtypes(include=["float"]).columns
    df[float_cols] = df[float_cols].astype(np.float32)

    df["latitude"] = df["latitude"].astype(np.float32).round(6)
    df["longitude"] = df["longitude"].astype(np.float32).round(6)

    if convert_time and not np.issubdtype(df['time'].dtype, np.datetime64):
        df["time"] = pd.to_datetime(df["time"])

    # put time, latitude, and longitude sla columns first
    if reorder:
        cols = ["time", "latitude", "longitude", "sla"]
        df = df[cols + [col for col in df.columns if col not in cols]]

    return df

In [4]:
print(df_db.shape)
df_cleaned = process_dataframe(df_db, convert_time=True, drop_duplicates=True, reorder=True)
df_cleaned = df_cleaned.dropna(axis=1, how='all')
display(df_cleaned.info())

(24420, 57)
<class 'pandas.core.frame.DataFrame'>
Index: 11850 entries, 0 to 24416
Data columns (total 36 columns):
 #   Column                      Non-Null Count  Dtype         
---  ------                      --------------  -----         
 0   time                        11850 non-null  datetime64[ns]
 1   latitude                    11850 non-null  float32       
 2   longitude                   11850 non-null  float32       
 3   sla                         11850 non-null  float32       
 4   dew_point_2m                11850 non-null  float32       
 5   cloud_cover_low             11850 non-null  float32       
 6   bottomT                     11850 non-null  float32       
 7   rain                        11850 non-null  float32       
 8   vo                          11850 non-null  float32       
 9   cloud_cover                 11850 non-null  float32       
 10  vapour_pressure_deficit     11850 non-null  float32       
 11  apparent_temperature        11850 non-null  flo

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[float_cols] = df[float_cols].astype(np.float32)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["latitude"] = df["latitude"].astype(np.float32).round(6)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["longitude"] = df["longitude"].astype(np.float32).round(6)


None

In [5]:
df_cleaned

Unnamed: 0,time,latitude,longitude,sla,dew_point_2m,cloud_cover_low,bottomT,rain,vo,cloud_cover,...,snowfall,et0_fao_evapotranspiration,cloud_cover_high,wind_speed_10m,wo,sob,relative_humidity_2m,weather_code,siconc,temperature_2m
0,2025-01-09 22:00:00,54.358276,10.152689,0.161197,-0.9635,5.0,4.576253,0.0,0.000000,56.0,...,0.0,0.00000,0.0,9.914717,8.869783e-07,12.287620,94.681297,2.0,0.0,-0.2135
1,2025-01-09 22:00:00,54.374943,10.180467,0.160010,-0.9635,5.0,4.348308,0.0,0.055597,56.0,...,0.0,0.00000,0.0,9.914717,9.840887e-07,17.846630,94.681297,2.0,0.0,-0.2135
2,2025-01-09 22:00:00,54.424946,10.180467,0.146627,-0.8350,5.0,4.960179,0.0,0.005438,78.0,...,0.0,0.00000,0.0,10.621525,1.745254e-06,19.396133,92.986153,2.0,0.0,0.1650
3,2025-01-09 22:00:00,54.441608,10.263800,0.149021,-0.8350,5.0,5.145225,0.0,-0.001097,78.0,...,0.0,0.00000,0.0,10.621525,2.571443e-06,19.804125,92.986153,2.0,0.0,0.1650
4,2025-01-09 22:00:00,54.474941,10.347133,0.151499,0.7500,2.0,5.326340,0.0,-0.031450,65.0,...,0.0,0.00850,0.0,18.297245,2.033797e-06,20.148500,85.761215,2.0,0.0,2.9000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24400,2025-01-01 00:00:00,54.441608,10.263800,-0.728558,4.5650,100.0,6.708191,0.6,0.016994,100.0,...,0.0,0.01828,100.0,43.679214,1.552433e-05,19.667498,88.872421,53.0,0.0,6.2650
24405,2025-01-01 00:00:00,54.458275,10.263800,-0.723298,4.5650,100.0,6.722604,0.6,0.004606,100.0,...,0.0,0.01828,100.0,43.679214,1.895358e-05,19.667965,88.872421,53.0,0.0,6.2650
24407,2025-01-01 00:00:00,54.458275,10.319355,-0.708619,4.5650,100.0,6.555498,0.6,0.019550,100.0,...,0.0,0.01828,100.0,43.679214,1.613287e-05,19.233673,88.872421,53.0,0.0,6.2650
24409,2025-01-01 00:00:00,54.474941,10.208244,-0.727748,4.5650,100.0,6.864717,0.6,-0.015680,100.0,...,0.0,0.01828,100.0,43.679214,2.608039e-05,19.250330,88.872421,53.0,0.0,6.2650


In [None]:

# fast api
app = fastapi.FastAPI()

@app.get("/")
def read_root():
    return {"Hello": "World"}

@app.get("/ocean_data")
def read_data():
    return df_cleaned.to_dict(orient="records")

11