In [25]:
import logging.config
import os
import re

import requests
import boto3
import botocore
import pandas as pd
from botocore.exceptions import ClientError
from zipfile import ZipFile

In [26]:

def get_zip(url, file_name):
    '''Downloads a file and writes it to current directory
    Args:
        url (str): the url to the file to be downloaded
        file_name (str): the location and name of file that will be downloaded as path
    Returns:
        None
    '''
    try:
        with open(file_name, "wb") as f:
            r = requests.get(url, timeout=30,stream=True)
            f.write(r.content)
        print('Zip file successfully downloaded from source, placed in %s', file_name)
    except requests.ConnectionError:
        print('Could not download: Connection error')
    except requests.Timeout:
        print('Could not download: Timeout error')
    except Exception:
        print('General: File was unable to be downloaded from source location')


def unzip(source_path, destination_path, data_filename):
    '''Unzips a zip file
    Args:
        source_path (str): the filepath of the zip file
        destination_path (str): the directory to unzip file to
        data_filename (str): the file within the zip file to extract
    Returns:
        None
    '''
    try:
        with ZipFile(source_path, 'r') as zipObj:
            zipObj.extract(data_filename, destination_path)
        print('File successfully unzipped and extracted, located at %s', destination_path)
    except Exception:
        print('File %s was not able to be unzipped', source_path)


In [36]:
get_zip('https://filebin.net/archive/tniv4fxc1k45dyrv/zip', 'check_download/stockwatcher.zip')

Zip file successfully downloaded from source, placed in %s check_download/stockwatcher.zip


In [41]:
import urllib.request
urllib.request.urlretrieve("https://filebin.net/tniv4fxc1k45dyrv/recent_transactions.csv", "check_download/stockwatcher.csv")

('check_download/stockwatcher.csv',
 <http.client.HTTPMessage at 0x7fc0e05fbf10>)

In [47]:
urllib.request.urlretrieve("https://fastupload.io/7qOrbYHFY6LniZO", "check_download/stockwatcher.zip")

('check_download/stockwatcher.zip',
 <http.client.HTTPMessage at 0x7fc10b7cc460>)

In [28]:
with ZipFile('check_download/my_data.zip', 'r') as zip_ref:
    zip_ref.extractall('check_download')

In [48]:
with ZipFile('check_download/stockwatcher.zip', 'r') as zip_ref:
    zip_ref.extractall('check_download')

BadZipFile: File is not a zip file

In [20]:
unzip('check_download/my_data.zip', 'check_download/stockwatcher','stockwatcher')

File %s was not able to be unzipped check_download/my_data.zip


In [1]:
import re
regex = r"s3://([\w._-]+)/([\w./_-]+)"
matched = re.match(regex, 's3://2022-msia423-dhatwalia-narin/raw_data/recent_transactions.csv')
print(matched)

<re.Match object; span=(0, 66), match='s3://2022-msia423-dhatwalia-narin/raw_data/recent>


In [23]:
matched.group(2)

'raw_data/recent_transactions.csv'

In [19]:
matched.groups()

('2022-msia423-dhatwalia-narin', 'raw_data/recent_transactions.csv')