In [11]:
import datetime
from datetime import datetime as dt
import requests
import xarray as xr 
import gzip
import shutil
import calendar
from tqdm import tqdm
import rioxarray

# Functions definition

In [58]:
def build_CHIRPS_filename(query_date, selected_area=None):
    # returns filename with proper naming convention

    query_day = query_date.strftime('%d')
    query_month = query_date.strftime('%m')
    query_year = query_date.strftime('%Y')

    if selected_area == None :
        filename = "CHIRPS_v2.0_Africa_"+query_year+"_"+query_month+"_"+query_day
    else :
        filename = "CHIRPS_v2.0_Africa_"+selected_area+"_"+query_year+"_"+query_month+"_"+query_day

    return filename

In [59]:
def download_CHIRPS_day(query_date, download_path="../data/0_downloads/"):
    """
    query_date datetime.eate
    """
    # https://data.chc.ucsb.edu/products/CHIRPS-2.0/africa_daily/tifs/p05/2022/chirps-v2.0.2022.01.11.tif.gz

    query_day = query_date.strftime('%d')
    query_month = query_date.strftime('%m')
    query_year = query_date.strftime('%Y')

    URL_filename = "chirps-v2.0."+query_year+"."+query_month+"."+query_day+".tif.gz"
    URL_full = "https://data.chc.ucsb.edu/products/CHIRPS-2.0/africa_daily/tifs/p05/"+query_year+"/"+URL_filename

    save_filename = build_CHIRPS_filename(query_date)+".tif.gz"

    # il file already exists, we do not download it
    if os.path.isfile(os.path.join(download_path,save_filename)) == True :
        # print("file already exists. skipping download")
        pass
    else:
        try:
            response = requests.get(URL_full)
            if response.status_code != 404:
                # if status code is different than 404, we download the file
                open(os.path.join(download_path,save_filename), "wb").write(response.content)
            else:
                # hotfix to get images that weren't gzipped during 2021
                print("download : hotfix for bad gzips")
                response = requests.get(URL_full.replace(".tif.gz",".tif"))
                open(os.path.join(download_path,save_filename.replace(".tif.gz",".tif")), "wb").write(response.content)
        except:
            print("error downloading file")

In [60]:
def extract_CHIRPS_data(query_date, origin_path="../data/0_downloads/", dest_path='../data/1_extraction/CHIRPS_v2.0_Africa/'):
    """
    uqery must be datetime.date
    """

    origin_filename = build_CHIRPS_filename(query_date)+".tif.gz"
    origin_full_path = os.path.join(origin_path,origin_filename)

    dest_filename = build_CHIRPS_filename(query_date)+".tif"
    dest_full_path = dest_path + dest_filename

    if not os.path.exists(dest_path):
        os.makedirs(dest_path)

    try:
        with gzip.open(origin_full_path, 'rb') as f_in:
            with open(dest_full_path, 'wb') as f_out:
                shutil.copyfileobj(f_in, f_out)
    except:
        # hotfix to get images that weren't gzipped during 2021
        print("extraction : hotfix for bad gzips")
        shutil.copyfile(origin_full_path.replace(".tif.gz",".tif"), dest_full_path)

In [61]:
def crop_and_save_CHIRPS_day(query_date, area, selected_area, extraction_path='../data/1_extraction/CHIRPS_v2.0_Africa/', save_path="../data/3_output/"):

    nc_file_content = rioxarray.open_rasterio(os.path.join(extraction_path,build_CHIRPS_filename(query_date)+".tif"))

    nc_file_content = nc_file_content.rio.clip_box(
        minx=area[selected_area][1],
        miny=area[selected_area][2],
        maxx=area[selected_area][3],
        maxy=area[selected_area][0],
    )

    output_filename = build_CHIRPS_filename(query_date,selected_area)+".tif"
    output_path = os.path.join(save_path,"CHIRPS_v2.0_Africa_"+selected_area)

    if not os.path.exists(output_path):
        os.makedirs(output_path)

    nc_file_content.rio.to_raster(os.path.join(output_path,output_filename))

In [62]:
def download_CHIRPS_month(query_date, area, selected_area):

    num_days = calendar.monthrange(query_date.year, query_date.month)[1]
    days = [datetime.date(query_date.year, query_date.month, day) for day in range(1, num_days+1)]
    
    for day in tqdm(days) :
        download_CHIRPS_day(day)
        extract_CHIRPS_data(day)
        crop_and_save_CHIRPS_day(day, area, selected_area)

In [63]:
def download_CHIRPS_year(query_date, area, selected_area):

    end_date = datetime.date(query_date.year,12,31)
    start_date = datetime.date(query_date.year,1,1)
    num_days = (end_date-start_date).days
    
    for num_day in tqdm(range(num_days+1)) :
        try:
            date = datetime.date(query_date.year,1,1)+datetime.timedelta(days=num_day)
            download_CHIRPS_day(date)
            extract_CHIRPS_data(date)
            crop_and_save_CHIRPS_day(date, area, selected_area)
        except:
            print("error with day",num_day)
            pass

# Run

## Single date

In [64]:
# coordinates of area : lat NW, lon NW, lat SE, lon SE
area = {
    'burkina': [16, -6, 9, 3],
    'niger':[23.8, -0.5, 11.3, 15.9],
    'west_africa':[29, -20, 3.5, 26]}

selected_area = "niger"

query_date = datetime.date(2021,12,1)

download_CHIRPS_day(query_date)
extract_CHIRPS_data(query_date)
crop_and_save_CHIRPS_day(query_date, area, selected_area)

hotfix for bad gzips
hotfix for bad gzips


## A whole month

In [None]:
# coordinates of area : lat NW, lon NW, lat SE, lon SE
area = {
    'burkina': [16, -6, 9, 3],
    'niger':[23.8, -0.5, 11.3, 15.9],
    'west_africa':[29, -20, 3.5, 26]}

selected_area = "niger"

query_date = datetime.date(2022,1,1) # will run for Jan, 2021

download_CHIRPS_month(query_date, area, selected_area)

## A whole year

In [65]:
# coordinates of area : lat NW, lon NW, lat SE, lon SE
area = {
    'burkina': [16, -6, 9, 3],
    'niger':[23.8, -0.5, 11.3, 15.9],
    'west_africa':[29, -20, 3.5, 26]}

selected_area = "niger"

query_date = datetime.date(2021,1,1) # will run for whole year

download_CHIRPS_year(query_date, area, selected_area)

 92%|█████████▏| 334/364 [01:20<00:07,  3.80it/s]

hotfix for bad gzips


 92%|█████████▏| 335/364 [01:34<02:02,  4.22s/it]

hotfix for bad gzips
hotfix for bad gzips
hotfix for bad gzips


 92%|█████████▏| 336/364 [01:51<03:49,  8.19s/it]

hotfix for bad gzips


 93%|█████████▎| 337/364 [01:57<03:20,  7.42s/it]

hotfix for bad gzips
hotfix for bad gzips


 93%|█████████▎| 338/364 [02:12<04:13,  9.77s/it]

hotfix for bad gzips
hotfix for bad gzips
hotfix for bad gzips


 93%|█████████▎| 339/364 [02:26<04:33, 10.93s/it]

hotfix for bad gzips


 93%|█████████▎| 340/364 [02:30<03:38,  9.11s/it]

hotfix for bad gzips
hotfix for bad gzips


 94%|█████████▎| 341/364 [02:36<03:05,  8.06s/it]

hotfix for bad gzips
hotfix for bad gzips


 94%|█████████▍| 342/364 [02:42<02:40,  7.32s/it]

hotfix for bad gzips
hotfix for bad gzips


 94%|█████████▍| 343/364 [02:46<02:12,  6.31s/it]

hotfix for bad gzips
hotfix for bad gzips


 95%|█████████▍| 344/364 [03:05<03:26, 10.33s/it]

hotfix for bad gzips
hotfix for bad gzips


 95%|█████████▍| 345/364 [03:17<03:24, 10.75s/it]

hotfix for bad gzips
hotfix for bad gzips


 95%|█████████▌| 346/364 [03:24<02:52,  9.58s/it]

hotfix for bad gzips
hotfix for bad gzips


 95%|█████████▌| 347/364 [03:30<02:25,  8.54s/it]

hotfix for bad gzips
hotfix for bad gzips


 96%|█████████▌| 348/364 [03:44<02:40, 10.06s/it]

hotfix for bad gzips
hotfix for bad gzips


 96%|█████████▌| 349/364 [03:47<01:59,  7.96s/it]

hotfix for bad gzips
hotfix for bad gzips


 96%|█████████▌| 350/364 [03:51<01:37,  6.97s/it]

hotfix for bad gzips
hotfix for bad gzips


 96%|█████████▋| 351/364 [03:56<01:22,  6.33s/it]

hotfix for bad gzips
hotfix for bad gzips


 97%|█████████▋| 352/364 [04:16<02:04, 10.37s/it]

hotfix for bad gzips
hotfix for bad gzips


 97%|█████████▋| 353/364 [04:19<01:31,  8.32s/it]

hotfix for bad gzips
hotfix for bad gzips
hotfix for bad gzips


 97%|█████████▋| 354/364 [04:38<01:52, 11.29s/it]

hotfix for bad gzips
hotfix for bad gzips


 98%|█████████▊| 355/364 [04:41<01:19,  8.82s/it]

hotfix for bad gzips


 98%|█████████▊| 356/364 [04:46<01:02,  7.82s/it]

hotfix for bad gzips
hotfix for bad gzips


 98%|█████████▊| 357/364 [05:00<01:07,  9.68s/it]

hotfix for bad gzips
hotfix for bad gzips
hotfix for bad gzips


 98%|█████████▊| 358/364 [05:14<01:05, 10.94s/it]

hotfix for bad gzips


 99%|█████████▊| 359/364 [05:29<01:01, 12.22s/it]

hotfix for bad gzips
hotfix for bad gzips


 99%|█████████▉| 360/364 [05:39<00:45, 11.33s/it]

hotfix for bad gzips
hotfix for bad gzips


 99%|█████████▉| 361/364 [05:42<00:26,  8.92s/it]

hotfix for bad gzips
hotfix for bad gzips
hotfix for bad gzips


 99%|█████████▉| 362/364 [05:45<00:14,  7.19s/it]

hotfix for bad gzips


100%|█████████▉| 363/364 [05:59<00:09,  9.16s/it]

hotfix for bad gzips
hotfix for bad gzips


100%|██████████| 364/364 [06:08<00:00,  9.15s/it]

hotfix for bad gzips


100%|██████████| 364/364 [06:08<00:00,  1.01s/it]


## Multiple years

In [None]:
# coordinates of area : lat NW, lon NW, lat SE, lon SE
area = {
    'burkina': [16, -6, 9, 3],
    'niger':[23.8, -0.5, 11.3, 15.9],
    'west_africa':[29, -20, 3.5, 26]}

selected_area = "niger"

for year in range(2019,2022): 
    query_date = datetime.date(year,1,1) # will run for whole year
    download_CHIRPS_year(query_date, area, selected_area)