In [None]:
import os.path
import requests
import zipfile

# Define datasets
datasets = {
    'Breast Cancer Wisconsin (Diagnostic) Data Set': {
        'filename': 'wdbc.data',
        'url':'http://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data',
    },
    'Iris Data Set': {
        'filename': 'iris.data',
        'url': 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data',
    },
    'Economic Freedom of the World Data': {
        'filename': 'efw.xlsx',
        'url': 'https://www.fraserinstitute.org/sites/default/files/efw-2020-master-index-data-for-researchers.xlsx',
    },
    'ATP Players': {
        'filename': 'atp_players.csv',
        'url': 'https://github.com/JeffSackmann/tennis_atp/raw/master/atp_players.csv',
    },
    'ATP Rankings 2000s': {
        'filename': 'atp_rankings_00s.csv',
        'url': 'https://github.com/JeffSackmann/tennis_atp/raw/master/atp_rankings_00s.csv',
    },
    'ATP Rankings 2010s': {
        'filename': 'atp_rankings_10s.csv',
        'url': 'https://github.com/JeffSackmann/tennis_atp/raw/master/atp_rankings_10s.csv',
    },
    'Triton Filesizes': {
        'filename': 'filesizes_timestamps.txt',
        'url': 'https://raw.githubusercontent.com/AaltoSciComp/triton-fs-stats/master/filesizes_timestamps/filesizes_timestamps.txt',
    },
    'England football': {
        'filename': 'england_football.zip',
        'url': 'https://github.com/footballcsv/england/archive/master.zip',
        'zip': True,
    },
}

def download_dataset(url, filepath):
    data_request = requests.get(url)
    if data_request.status_code != 200:
        raise Exception('Failure at downloading!')
    print('Saving dataset to: %s' % filepath)
    with open(filepath, 'wb') as datafile:
        datafile.write(data_request.content)

def extract_zip(filename, extract_path):
    datafile = zipfile.ZipFile(filename)
    datafile.extractall(path=extract_path)
    

for dataset_name, dataset_info in datasets.items():
    filename = dataset_info['filename']
    url = dataset_info['url']
    filepath = os.path.join('data', filename)
    if not os.path.isfile(filepath):
        print('Downloading dataset: %s' % dataset_name)
        download_dataset(url, filepath)
    if dataset_info.get('zip', False):
        print('Extracting dataset: %s' % dataset_name)
        extract_path = dataset_info.get('extract_path', 'data')
        extract_zip(filepath, extract_path)