# Download precipitation data from Polish Institute of Meteorology and Water Management - National Research Institute

The aim of this notebook is to download data from a website of Polish Institute of Meteorology and process them in order to optain a Pandas Data Frame with monthly precipitation data since 1951 till present.

Data souce: https://dane.imgw.pl/data/dane_pomiarowo_obserwacyjne/dane_meteorologiczne/miesieczne/opad/



In [9]:
import requests, re, zipfile, io
from bs4 import BeautifulSoup

url = 'https://dane.imgw.pl/data/dane_pomiarowo_obserwacyjne/dane_meteorologiczne/miesieczne/opad/'
target_data_folder = 'Data'

In [10]:
# 1. Parse a site with meteorological data
def get_html_of_a_meteo_site(url):
    site_with_catalogs = requests.get(url)
    site_html = BeautifulSoup(site_with_catalogs.text, 'html.parser')
    return site_html
    
# 2. Extract a list of folders
def make_data_link_list(site_html):
    list_of_links = [link.get("href", None) for link in site_html.find_all('a', href=re.compile("\d{4}(:?_\d{4})?")) ]  #\d{4}
    return list_of_links


# 3. Download zip files and extract them
def download_and_extract_zip(file_url, target_folder):
    response = requests.get(file_url)
    if response.status_code == 200:
        z = zipfile.ZipFile(io.BytesIO(response.content))
        z.extractall(target_folder)

        
# 4. Iterate through all folders and download files
def walk_folders_and_get_meteo_files(url, target_data_folder):
    site_html = get_html_of_a_meteo_site(url)
    list_of_folders = make_data_link_list(site_html)
    for folder_link in list_of_folders:
        print(f'Files from from {folder_link} are ready to be downloaded.')
        site_html_with_files = get_html_of_a_meteo_site(url+folder_link)
        list_of_files = make_data_link_list(site_html_with_files)
        for file_link in list_of_files:
            file_url=url+folder_link+file_link
            download_and_extract_zip(file_url, target_data_folder)
            print(f'File: {file_link} has been downloaded.')
    print('All data available had been downloaded')
    

In [11]:
walk_folders_and_get_meteo_files(url, target_data_folder)

Files from from 1950_1955/ are ready to be downloaded.
File: 1950_1955_m_o.zip has been downloaded.
Files from from 1956_1960/ are ready to be downloaded.
File: 1956_1960_m_o.zip has been downloaded.
Files from from 1961_1965/ are ready to be downloaded.
File: 1961_1965_m_o.zip has been downloaded.
Files from from 1966_1970/ are ready to be downloaded.
File: 1966_1970_m_o.zip has been downloaded.
Files from from 1971_1975/ are ready to be downloaded.
File: 1971_1975_m_o.zip has been downloaded.
Files from from 1976_1980/ are ready to be downloaded.
File: 1976_1980_m_o.zip has been downloaded.
Files from from 1981_1985/ are ready to be downloaded.
File: 1981_1985_m_o.zip has been downloaded.
Files from from 1986_1990/ are ready to be downloaded.
File: 1986_1990_m_o.zip has been downloaded.
Files from from 1991_1995/ are ready to be downloaded.
File: 1991_1995_m_o.zip has been downloaded.
Files from from 1996_2000/ are ready to be downloaded.
File: 1996_2000_m_o.zip has been downloaded.
