In [1]:
from random import randint
import math
from urllib import request 
import logging
from datetime import datetime

# Assignment 04
#### Python Basics IV - Functions, Unity Testing, and Logging

This tutorial was written by Terry L. Ruas (University of Göttingen). The references for external contributors for which this material was anyhow adapted/inspired are in the Acknowledgments section (end of the document).

This notebook will cover the following tasks:

1. Lambda functions
2. List comprehensions
3. Unity Test
4. HTTP Request
5. Logging
6. Download File

## Task 01 – Lambda functions
Python supports lambda functions as a handy way to define small, anonymous, i.e., unnamed, functions inline. The basic syntax for lambda functions is:
$$
 \textbf{lambda}\text{ parameter1, parameter2, ... : }\textbf{expression}
$$
Use a lambda function only to retain the even values in an array of integers. Test your function with an input array of your choosing. Print the input array and the filtered output array to stdout.

In [2]:
def lambdarr(arr):
    return list(filter(lambda n:n%2==0,arr))  # funktioniert nicht mit [] statt list()??

In [3]:
testliste = [randint(0,1001) for x in range(20)]
print(testliste)
print(lambdarr(testliste))

[626, 249, 181, 788, 663, 350, 334, 573, 884, 806, 919, 337, 237, 229, 470, 682, 562, 995, 557, 218]
[626, 788, 350, 334, 884, 806, 470, 682, 562, 218]


## Task 02 – List comprehensions
Python supports list comprehension. The basic syntax of list comprehensions is:
$$
\text{L = [<elem> for <elem> <Condition>]}
$$
Use list comprehensions to write a Python function *remove_long_words()* that:
- accepts a sentence s and an integer n as input parameters
- uses the *split()* function of String objects to split the sentence into words
- stores the individual words in a list
- removes all words that are longer than n characters from the list, thereby creating a new list
- prints the list to stdout

In [4]:
def remove_long_word(sentence, n):
    sentence = sentence.split()
    result = [word for word in sentence if len(word)<=n]
    return result

In [5]:
remove_long_word("This tutorial was written by Terry L. Ruas (University Göttingen).", 7)

['This', 'was', 'written', 'by', 'Terry', 'L.', 'Ruas']

## Task 03 – Unity Test
The following algorithm in Python converts numbers in decimal representation to binary.
1. Develop a unit test that checks for values in the interval \[-1,3\] whether the algorithm returns the
expected results.
2. Adjust the algorithm, so it passes the unit test developed in 1). Rename the function to
*decimal_to_binary_correct()*

In [6]:
def decimal2binary(n):
    # function to convert decimal integers to binary
    x = []
    while n > 0:
        x.append(n % 2)
        n = math.floor(n/2)
    result = int(''.join([str(num) for num in x[::-1]])) # convert output to binary representation 
    return result


In [7]:
def eigenbin(n):
    # binary converter with builtin bin() 
    if n<0:
        return int(''.join([x for x in [*bin(n)][3:]]))*-1
    else:
        return int(''.join([x for x in [*bin(n)][2:]]))

In [8]:
print(decimal2binary(6))
print(eigenbin(6))

# negative values not considered in decimal2binary()
print(eigenbin(-1))
print(decimal2binary(-1))  


110
110
-1


ValueError: invalid literal for int() with base 10: ''

In [9]:
def bincheck():
    for n in range(-1, 4):
        assert decimal2binary(n) == eigenbin(n), "not correct xD" 
        

In [10]:
bincheck()

ValueError: invalid literal for int() with base 10: ''

In [11]:
def d2b_correct(num):
    # function to convert decimal integers to binary
    negative = False
    if num < 0:
        negative = True
    n = abs(num)
    x = []
    while n > 0:
        x.append(n % 2)
        n = math.floor(n/2)
    result = int(''.join([str(num) for num in x[::-1]])) # convert output to binary representation 
    if negative:
        return result*-1
    else:
        return result

In [12]:
print("decimal2binary:\t", decimal2binary(8))
print("eigenbin: \t", eigenbin(8))
print("d2b_correct: \t", d2b_correct(8))

decimal2binary:	 1000
eigenbin: 	 1000
d2b_correct: 	 1000


In [13]:
# print("decimal2binary:\t", decimal2binary(-8)) 
print("eigenbin: \t", eigenbin(-8))
print("d2b_correct: \t", d2b_correct(-8))

eigenbin: 	 -1000
d2b_correct: 	 -1000


## Task 04 – HTTP Request
Working with HTTP connections is essential for many data gathering tasks. The Python library *urllib* provides all functionality we need.
Write a Python function *open_url(url)* that:
- uses urllib to establish a HTTP connection to an arbitrary website
- retrieves and prints the first 200 characters of the html resource, i.e. the html source code, of the chosen website
- handles the exceptions thrown by the *urllib.request* function

FYI: The basic syntax for exception handling in Python is as follows:
```
try:
    ...
    return ...
except SomeError1 as e:
    # error-specific exception handling
except SomeError2 as e:
    # error-specific exception handling
except
    # general exception handling
```

In [14]:
def open_url(url):
    while True:
        try:
            # http connection with request.urlopen()
            with request.urlopen(url) as response:
                # retrieve html
                htmlsource = response.read()
                # output first 200 chars
                text = htmlsource.decode('utf-8')
                return text[:200]

        except ValueError:
            print("""
            Value error: Please provide a valid url in the format 
            "http://your_url" or "https://your_url"
            """)
            url = input()
            continue
        except (NameError, URLError, HTTPError) as err:
            print("An error occured: ", err)
    

In [15]:
open_url("www.fast.com")


            Value error: Please provide a valid url in the format 
            "http://your_url" or "https://your_url"
            
http://fast.com/


'<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">\n<!-- htmllint id-class-style="dash" -->\n<!-- htmllint attr-name-style="dash" -->\n<html>\n    <head profile="ht'

In [16]:
print(open_url("http://example.com/"))

print()
# works with https
print(open_url("https://python.org/"))

<!doctype html>
<html>
<head>
    <title>Example Domain</title>

    <meta charset="utf-8" />
    <meta http-equiv="Content-type" content="text/html; charset=utf-8" />
    <meta name="viewport" conten

<!doctype html>
<!--[if lt IE 7]>   <html class="no-js ie6 lt-ie7 lt-ie8 lt-ie9">   <![endif]-->
<!--[if IE 7]>      <html class="no-js ie7 lt-ie8 lt-ie9">          <![endif]-->
<!--[if IE 8]>      <h


## Task 05 – Logging
The logging module in Python provides functionality for logging and debugging purposes. Use the logging module to extend the error handling for the function that you implemented to establish a HTTP connection (Task 4). All exceptions thrown by your function shall be logged as errors.

To accomplish the task:
- write a Python function *init_log(file_name, file_mode, level, format, date_format)* that initializes a custom log file to which all debugging information and errors are appended using a format that includes the date, time, level and the message of the logging event
- log occurring errors by calling *logging.error(...)*
- close the log after completing your task by calling *logging.shutdown()*

If you choose not to complete Tasks 4, test the logging functionality with a few examples of your own.

In [17]:
# filemode = 'a' is append; 'w' would be write (e.g. overwrite old log if it exists)

def init_log(file_name, file_mode='a', level=logging.DEBUG, logformat='%(asctime)s - %(levelname)s - %(message)s', date_format='%Y-%m-%d %H:%M:%S'):
    # Set up basic configuration using logging.basicConfig
    logging.basicConfig(filename=file_name, filemode=file_mode, level=level, format=logformat, datefmt=date_format)
    

In [18]:
def open_url(url):
    init_log("open_url.log")
    while True:
        try:
            # http connection with request.urlopen()
            with request.urlopen(url) as response:
                logging.info(f"opened url {url} as HTTPResponse object")
                # retrieve html
                htmlsource = response.read()
                logging.info(f"read in html from {url}")
                # output first 200 chars
                text = htmlsource.decode('utf-8')
                logging.info("decoded html")
                return text[:200]

        except ValueError:
            logging.warning(f"VALUE ERROR for {url}")
            print("""
            Value error: Please provide a valid url in the format 
            "http://your_url" or "https://your_url"
            """)
            url = input()
            continue
        except (NameError, URLError, HTTPError) as err:
            logging.error("%s", err)
            print("An error occured: ", err)
    logging.shutdown()
    

## Task 06 – Download File
In Task 4, you used the *urllib* library to establish a http connection. You can also use the *urllib* library to perform simple file downloads.

Write a Python function *download_file(url, path)* that:
- checks whether the input URL points to a .txt file
- if the input URL points to a .txt file, uses the *urllib* library to download and write the text file to the given path on your machine
- logs an error “No text file found at given URL, download aborted!” to the log file created in Task 5 if the input URL does not point to a .txt file.
- properly handles exceptions

Use the *download_file()* function to download William Shakespeare’s drama Macbeth as a plain
text file from: [Macbeth](https://ia802707.us.archive.org/1/items/macbeth02264gut/0ws3410.txt)

In [19]:
testurl = "https://ia802707.us.archive.org/1/items/macbeth02264gut/0ws3410.txt"
print(testurl.rsplit(".", 1)[-1])

txt


In [20]:
def download_file(url, path):
    init_log("download_file.log")
    try:
        if url.rsplit('.')[-1] == "txt":
            with request.urlopen(url) as file:
                content = file.read().decode()

            # Save to file
            with open(path, 'a') as download:
                download.write(content)
                logging.info("successfully retrieved txt file")
        
        else:
            raise URLError
    except URLError:
        logging.error("No text file found at given URL, download aborted!")
    except:
        print("Please provide a valid URL")
    logging.shutdown()       

In [21]:

download_file(testurl, "./macbeth.txt")