In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime as dt

# from osgeo import gdal
from os.path import join

INPUT_FOLDER = "D:/Donnees/Others/geographic/bangladesh"

## Simulation de l'historique des inondation

Dans cette cellule, nous allons coder une petite simulation qui va générer des données factices d'inondation. Cette simulation va calculer une série temporelle des inondations frappant les écoles.

In [2]:
school_count = 1000
x_center = 100
y_center = 25
x_width = 10
y_width = 5

filepath = join(INPUT_FOLDER, "dummy_schools_1.csv")

schools = pd.DataFrame(
    index=pd.Index(name="school", data=[f"school {i}" for i in range(school_count)]),
    data={
        "x": np.random.rand(school_count),
        "y": np.random.rand(school_count)
    }
)
years = np.arange(2020, 2101, dtype=int)

serie = pd.DataFrame()
tm = 0.0
tp = 0.0
for year in years:
    tp = (year - years.min()) / (years.max() - years.min())
    
    impacted = schools.loc[(schools["y"] >= tm) & (schools["y"] < tp), :].copy()
    impacted["year"] = year
    
    serie = pd.concat((serie, impacted.reset_index()), axis=0)
    tm = tp

# post-processing
serie["date"] = serie["year"].apply(lambda x: dt.date(year=x, month=1, day=1))
serie["x"] = x_center + x_width * (serie["x"] - 0.5)
serie["y"] = y_center + y_width * (serie["y"] - 0.5)
serie = serie.sort_values(by=["year", "school"], ascending=True)[["year", "school", "x", "y"]]

serie.to_csv(filepath, sep=",", header=True, index=False)
serie.info()
serie.head()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1000 entries, 1 to 14
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   year    1000 non-null   int32  
 1   school  1000 non-null   object 
 2   x       1000 non-null   float64
 3   y       1000 non-null   float64
dtypes: float64(2), int32(1), object(1)
memory usage: 35.2+ KB


Unnamed: 0,year,school,x,y
1,2021,school 111,96.458433,22.521023
0,2021,school 22,98.024033,22.501135
2,2021,school 303,103.366128,22.557286
3,2021,school 330,103.679999,22.534679
4,2021,school 393,98.55878,22.547319


The second simulation is based from the schools coordinates. It takes a sample of the set of schools and assigns them randomly to a flood date. This allows for more consistent beta-testing in the QGIS temporal animation.

In [3]:
schools = pd.read_csv(join(INPUT_FOLDER, "schools.csv"), header=0, sep=";", decimal=".")

schools = (
    schools
    .where(schools["x_clean"] != "#VALEUR!")
    .where(schools["y_clean"] != "#VALEUR!")
    .dropna()
    .applymap(lambda x: x.replace("(", "").replace(")", ""))
    .astype(float)
    .sample(frac=0.1)
)

schools["date"] = (
    pd
    .date_range(
        name="date",
        start=dt.datetime(2020, 1, 1),
        end=dt.datetime(2100, 1, 1),
        periods=len(schools)
    )
    .to_frame()
    .values
)
schools["year"] = schools["date"].dt.year
schools["school"] = [f"school {i}" for i in range(len(schools))]

schools = (
    schools
    .sort_values(by=["year", "school"])
    .rename(columns={"x_clean": "x", "y_clean": "y"})
    [["year", "school", "x", "y"]]
)

schools.to_csv(join(INPUT_FOLDER, "dummy_school_flood.csv"), sep=",", header=True, index=False)
schools.info()
schools

<class 'pandas.core.frame.DataFrame'>
Int64Index: 7797 entries, 25849 to 73295
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   year    7797 non-null   int64  
 1   school  7797 non-null   object 
 2   x       7797 non-null   float64
 3   y       7797 non-null   float64
dtypes: float64(2), int64(1), object(1)
memory usage: 304.6+ KB


Unnamed: 0,year,school,x,y
25849,2020,school 0,90.279996,23.028056
28100,2020,school 1,90.505877,23.512365
57167,2020,school 10,88.426844,24.754573
46378,2020,school 11,89.092474,23.515667
56939,2020,school 12,89.113764,24.629591
...,...,...,...,...
36664,2099,school 7792,90.709045,24.891438
8905,2099,school 7793,91.910988,22.427717
27195,2099,school 7794,89.925313,23.795063
65735,2099,school 7795,89.286343,25.149974
