In [2]:
# API
import requests
import json

# General
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime

### Synthetic weather forecast with Gaussian Noise
Since I do not have easy access to weather forecast data I am going to synthetically generate some precipitation forecasts from real past measurements. The setup will be as follows:
- take past precipitation data in 6 day chunks
- day 0 is today, days 1-5 are the days to be turned into forecasts
- add additive Gaussian Noise to days 1-5, simulating the increasing uncertainty in the forecast
- Additional conditions:
    - precipitation is truncated at 0 mm
    - the error is not symmetric, eg. over shooting happens more often for small precipitation values, while under-
      shooting occurs for outliers
    - so the distribution from which we add the noise is conditional on the length of the forecast and the observed actual value. Observed values of 0 mm have a very small chance of getting some noise added. Small/usual precipitation values have a right-skewed distribution with center on their actual value and variance linked to the length of the forecast. And large/outlier precipitation values have a left-skewed distribution with variance likewise increasing with length of the forecast

In [3]:
# Load and preprocess data
refEt = pd.read_csv("./Data/fourierEt.csv")
weather = pd.read_csv("./Data/weather.csv")
weather["refEt"] = refEt
weather["date"] = pd.to_datetime(weather["date"], format= "%Y-%m-%d")

# Get data for a given year + format to AquaCrop specifications
def getYear(weather, year):
    weather = weather.loc[pd.DatetimeIndex(weather.date).year == year]
    
    weather = weather.loc[:, ("temp_min", "temp_max", "rain", "refEt", "date")]
    weather.columns = ["MinTemp", "MaxTemp", "Precipitation", "ReferenceET", "Date"]
    weather.reset_index(drop = True, inplace = True)
    
    return weather

weather = getYear(weather, 2010)
weather

Unnamed: 0,MinTemp,MaxTemp,Precipitation,ReferenceET,Date
0,22.2,32.3,13.4,4.03,2010-01-01
1,16.3,23.3,21.8,5.00,2010-01-02
2,12.6,23.2,0.0,6.53,2010-01-03
3,12.7,28.9,0.0,6.48,2010-01-04
4,14.8,32.6,0.0,7.80,2010-01-05
...,...,...,...,...,...
360,10.5,18.0,0.0,5.41,2010-12-27
361,8.5,24.0,0.0,6.04,2010-12-28
362,13.0,28.9,0.0,6.68,2010-12-29
363,14.9,30.8,0.0,7.03,2010-12-30


In [120]:
# Extract precipitation data in 5 day chunks
forecastP = np.zeros((len(weather) - 5, 5))
for i in range(len(weather) - 5):
    forecastP[i] = weather.loc[i+1:i+5, "Precipitation"] 
    
forecastP = pd.DataFrame(forecastP, columns = [f"Day{i}" for i in range(1, 6)])
forecastP

Unnamed: 0,Day1,Day2,Day3,Day4,Day5
0,21.8,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...
355,0.0,0.0,0.0,0.3,0.0
356,0.0,0.0,0.3,0.0,0.0
357,0.0,0.3,0.0,0.0,0.0
358,0.3,0.0,0.0,0.0,0.0


In [121]:
# Summary statistics
meanP = weather.Precipitation.mean()
stdP = weather.Precipitation.std()
q90p = weather.Precipitation.quantile(0.90)
q95p = weather.Precipitation.quantile(0.95)
maxP = weather.Precipitation.max()

In [122]:
# Add noise to make synthetic forecast data
# For simplicity I just use uniforms of different intervals

for i in range(forecastP.shape[0]):
    for j in range(forecastP.shape[1]):
        val = forecastP.iloc[i, j]
        
        # if value is 0, add small positive (up to mean preipitation) noise in 20% of cases
        if val == 0:
            if np.random.random() >= 0.8:
                noise = np.random.uniform(meanP * 0.2 * (j + 1))
                forecastP.iloc[i, j] += noise
                
        # if value is > 0 but < mean, add small noise that may also be negative but capped at 0
        elif val > 0 and val <= meanP:
            noise = np.random.uniform(-meanP/2, meanP * 0.5 * (j + 1))
            if forecastP.iloc[i, j] + noise > 0:
                forecastP.iloc[i, j] += noise
            else:
                forecastP.iloc[i, j] == 0
            
        # if value is > mean and under < 90th quantile, add noise that can be positive or negative but never result in 0
        elif val > meanP and val < q90p:
            noise = np.random.uniform(-1/5 * j * val, stdP * 0.1 * (j + 1))
            forecastP.iloc[i, j] += noise
            
        # if value > 90th quantile, add mostly negative noise
        elif val > q90p:
            noise = np.random.uniform(-1/5 * (j + 1) * val, 1/10 * val * (j + 1))
            forecastP.iloc[i, j] += noise
            
forecastP.to_csv("./Data/forecastP.csv")
forecastP

Unnamed: 0,Day1,Day2,Day3,Day4,Day5
0,20.109620,0.000000,0.000000,0.000000,0.000000
1,0.814681,1.015133,0.000000,0.000000,0.000000
2,0.000000,0.000000,0.000000,0.000000,1.151283
3,0.000000,0.000000,0.000000,0.000000,1.490826
4,0.000000,0.000000,1.459916,0.000000,0.000000
...,...,...,...,...,...
355,0.000000,0.000000,1.326370,4.434016,0.000000
356,0.899526,0.000000,1.559760,1.380557,1.610326
357,0.000000,2.970673,0.000000,0.000000,0.000000
358,0.300000,0.000000,0.000000,0.000000,0.000000
