In [None]:
import pandas as pd
import requests
import matplotlib.pyplot as plt

plt.style.use('Solarize_Light2')

# Variables

In [None]:
TALAGANTE_LAT = -33.714913
TALAGANTE_LON= -70.957909

# API Flood Data

In [None]:
base_url_flood = "https://flood-api.open-meteo.com/v1/flood"

params_flood = {
    "latitude": TALAGANTE_LAT,
    "longitude": TALAGANTE_LON,
    "daily": "river_discharge",
    "start_date": "1984-01-01",
    "end_date": "2023-09-05",
    "models": "seamless_v4"
}

response_flood = requests.get(base_url_flood, params=params_flood)

data_flood = response_flood.json()
data_flood

In [None]:
data_flood['daily'].keys()

In [None]:
df_flood = pd.DataFrame(data= data_flood, columns=['date', 'river_discharge'])
df_flood

In [None]:
df_flood['date'] = data_flood['daily']['time']
df_flood['river_discharge'] = data_flood['daily']['river_discharge']
df_flood.set_index('date', inplace=True)
df_flood

In [None]:
df_flood.isnull().mean()

In [None]:
# checking which rows have missing values
df_flood[df_flood.isnull().any(axis=1)]

In [None]:
# df_flood.loc['2016-04-01':'2016-04-30']

In [None]:
# checking days around the missing values
# df_flood.loc['2023-08-01':'2023-08-07']

In [None]:
# dropping missing values
# df_flood.dropna(inplace=True)

In [None]:
df_flood.head(5)

# API Weather Data

In [None]:
base_url_weather = "https://archive-api.open-meteo.com/v1/archive"

params_weather = {
    "latitude": TALAGANTE_LAT,
    "longitude": TALAGANTE_LON,
    "start_date": "1984-01-01",
    "end_date": "2023-09-05",
    "hourly": "temperature_2m,rain,surface_pressure,windspeed_10m,winddirection_10m,soil_moisture_0_to_7cm,soil_moisture_7_to_28cm,shortwave_radiation",
    "timezone": "auto"
}

response_weather = requests.get(base_url_weather, params=params_weather)

data_weather = response_weather.json()
data_weather

In [None]:
data_weather['hourly'].keys()

In [None]:
df_weather = pd.DataFrame(data= data_weather, columns= ['date', 'temperature', 'rain', 'surface_pressure', 'windspeed', 'winddirection', 'oil_moisture_0_to_7cm', 'soil_moisture_7_to_28cm', 'radiation'])
df_weather

In [None]:
df_weather['date'] = data_weather['hourly']['time']
df_weather['temperature'] = data_weather['hourly']['temperature_2m']
df_weather['rain'] = data_weather['hourly']['rain']
df_weather['surface_pressure'] = data_weather['hourly']['surface_pressure']
df_weather['windspeed'] = data_weather['hourly']['windspeed_10m']
df_weather['winddirection'] = data_weather['hourly']['winddirection_10m']
df_weather['oil_moisture_0_to_7cm'] = data_weather['hourly']['soil_moisture_0_to_7cm']
df_weather['soil_moisture_7_to_28cm'] = data_weather['hourly']['soil_moisture_7_to_28cm']
df_weather['radiation'] = data_weather['hourly']['shortwave_radiation']
df_weather.set_index('date', inplace=True)
df_weather

In [None]:
# getting percentage of missing values
df_weather.isnull().mean()

In [None]:
# checking which rows have missing values
df_weather[df_weather.isnull().any(axis=1)]

In [None]:
# if 5 or more rows in a row have missing values, drop them
df_weather.dropna(thresh=5, inplace=True)

In [None]:
df_weather.loc['2023-08-29T21:00':'2023-09-05T23:00']

In [None]:
# dropping rows with missing values
# df_weather.dropna(inplace=True)
# df_weather[df_weather.isnull().any(axis=1)]

In [None]:
df_weather.head(5)

# Merging Weather and Flood Data

In [None]:
# setting the same index for both dataframes
df_weather.index = pd.to_datetime(df_weather.index)
df_flood.index = pd.to_datetime(df_flood.index)

In [None]:
# merging df flood and df weather
# put every df_flood river_discharge column value in every hour of the day
df_flood = df_flood.resample('H').ffill()
df_flood

In [None]:
# merging df flood and df weather
df = pd.merge(df_weather, df_flood, how='left', left_index=True, right_index=True)
df

In [None]:
df.corr()

In [None]:
# sorting by river discharge
df.sort_values(by='river_discharge', ascending=False, inplace=True)
df

# Saving Dataset to CSV

In [None]:
# sorting by date
df.sort_index(inplace=True)

# saving dataframe to csv
df.to_csv('data/raw_data.csv')    