# Python Data Science Toolbox - Part 1

## Writing Your Own Functions

In [None]:
# Structure of a Python function
def square(value):                        # <- function header with one parameter --> value (the function name and parameters together make up the function signature)
    """Returns the square of a value."""  # <- docstring gives user a summary of what the function does
    new_value = value**2                  # <- start of the function body
    return new_value                      # <- return data back to the point where the function was called

number_squared = square(4)                # <- call the function square() with one required argument 4
print(number_squared)

In [None]:
# Strings
object1 = "data" + "analysis" + "visualization"
object2 = 1 * 3
object3 = "1" * 3

print(" object1: ", object1, "\n", "object2: ", object2, "\n", "object3: ", object3)

In [None]:
# Some built-in functions
x = 4.89
y1 = str(x)
y2 = print(x)

print(" type(x): ", type(x), "\n", "type(y1): ", type(y1), "\n", "type(y2): ", type(y2))

# y2 is of type NoneType because the built-in function print() does NOT have a return value

In [None]:
# Define the function shout
def shout():
    """Print a string with three exclamation marks"""
    shout_word = "congratulations" + "!"*3
    print(shout_word)

# Call shout
shout()

In [None]:
# Define shout with the parameter, word
def shout(word):
    """Print a string with three exclamation marks"""
    shout_word = word + '!!!'
    print(shout_word)

# Call shout with the string argument 'congratulations'
shout("congratulations")

In [None]:
# Define shout with the parameter, word
def shout(word):
    """Return a string with three exclamation marks"""
    shout_word = word + "!"*3
    return shout_word

# Pass 'congratulations' to shout: assign the return value to the variable yell
yell = shout("congratulations")
print(yell)

In [None]:
# Define shout with parameters word1 and word2
def shout(word1, word2):
    """Concatenate strings with three exclamation marks"""
    shout1 = word1 + "!"*3
    shout2 = word2 + "!"*3
    new_shout = shout1 + shout2
    return new_shout

yell = shout("congratulations", "you")
print(yell)

In [None]:
# tuples
nums = (3, 4, 6)          # nums is a tuple object (remember that tuples are immutable)
print("type(nums): ", type(nums))

num1, num2, num3 = nums   # unpacking a tuple

print(" num1: ", num1, "\n", "num2: ", num2, "\n", "num3: ", num3)

In [None]:
# Returning multiple values using tuples
def shout_all(word1, word2):
    
    shout1 = word1 + "!"*3
    shout2 = word2 + "!"*3
    
    # Construct a tuple with shout1 and shout2: shout_words
    shout_words = (shout1, shout2)

    return shout_words

# Pass 'congratulations' and 'you' to shout_all(): yell1, yell2 is unpacked from the tuple returned from shout_all()
yell1, yell2 = shout_all("congratulations", "you")

print(yell1)
print(yell2)

In [None]:
# Identical to the fucntion but written in more compact code
def shout_all(word1, word2):
    
    shout1 = word1 + "!"*3
    shout2 = word2 + "!"*3
    
    # Returns a tuple (shout1, shout2)
    return shout1, shout2

# Pass 'congratulations' and 'you' to shout_all(): yell1, yell2 is unpacked from the tuple returned from shout_all()
yell1, yell2 = shout_all("congratulations", "you")

print(yell1)
print(yell2)

In [None]:
import pandas as pd

df = pd.read_csv("./data/tweets.csv")

langs_count = {}                                # initialize an empty dictionary: langs_count
col = df['lang']                                # extract column from df: col

for entry in col:                               # iterate over all elements in col
    if entry in langs_count.keys():             # if the language is in langs_count, add 1 
        langs_count[entry] += 1
    else:                                       # else add the language to langs_count and set the value to 1
        langs_count[entry] = 1

print(langs_count)

In [None]:
import pandas as pd

tweets_df = pd.read_csv("./data/tweets.csv")

def count_entries(df, col_name):
    """Return a dictionary with counts of occurrences as value for each key."""
    entry_count = {}
    col = df[col_name]
    
    for entry in col:
        if entry in entry_count.keys():
            entry_count[entry] += 1
        else:
            entry_count[entry] = 1

    return entry_count

entry_counts = count_entries(tweets_df, "source")
print(entry_counts)

## Default Arguments, Variable-Length Arguments, and Scope

In [None]:
# Examining the notion of scope
num = 5

def func1():
    num = 3                  # num is locally scoped in func1()
    print(num)

def func2():
    global num               # global gives func2() access to the global variable, num
    double_num = num * 2     # the global value of num is now being used within the func2() body
    num = 6                  # the global value of num is now being changed (this can be dangerous)
    print(double_num)

print("global value of num before func2 call: ", num)
func1()
func2()
print("global value of num after func2 call: ", num)

In [None]:
# Python built-in modules
import builtins
list_of_built_in_functions = dir(builtins)
print(len(list_of_built_in_functions))
print(list_of_built_in_functions)

In [None]:
# Nesting functions
def three_shouts(word1, word2, word3):
    """
    Returns a tuple of strings concatenated with '!!!'
    inner() is a nested function "enclosed" by three_shouts()
    """
    def inner(word):
        """Returns a string concatenated with '!!!'"""
        return word + "!"*3

    # Return a tuple of strings
    return (inner(word1), inner(word2), inner(word3))

print(three_shouts('a', 'b', 'c'))

##### Closure - the nested (or inner) function remembers the state of its enclosing scope when called.
##### Thus, anything defined locally in the enclosing scope is available to the inner function even when the outer function has finished execution.

In [None]:
def echo(n):
    """Return the inner_echo function."""
    def inner_echo(word1):
        echo_word = word1 * n
        return echo_word

    # Return the function object inner_echo
    return inner_echo

# Call echo: twice
twice = echo(2)
print(twice)
print(type(twice))

# Call echo: thrice
thrice = echo(3)
print(thrice)
print(type(thrice))

# Call twice() and thrice() then print
print(twice("hello"))
print(thrice("hello"))

In [None]:
# Define echo_shout()
def echo_shout(word):
    """Change the value of a nonlocal variable"""
    echo_word = word + word
    print(echo_word)
    
    def shout():
        """Alter a variable in the enclosing scope"""    
        # Use echo_word in nonlocal scope
        nonlocal echo_word
        echo_word = echo_word + "!"*3
    
    shout()
    print(echo_word)

# Call function echo_shout() with argument 'hello'
echo_shout("hello2")

In [None]:
def shout_echo(word1, echo=1):
    """Concatenate echo copies of word1 and three exclamation marks at the end of the string."""
    echo_word = word1*echo
    shout_word = echo_word + '!!!'
    return shout_word

# Call shout_echo() with "Hey": no_echo
no_echo = shout_echo("Hey")

# Call shout_echo() with "Hey" and echo=5: with_echo
with_echo = shout_echo("Hey", 5)

print(no_echo)
print(with_echo)

In [None]:
def shout_echo(word1, echo=1, intense=False):
    """Concatenate echo copies of word1 and three exclamation marks at the end of the string."""
    echo_word = word1*echo

    if intense:
        echo_word_new = echo_word.upper() + "!"*3
    else:
        echo_word_new = echo_word + "!"*3

    return echo_word_new

# Call shout_echo() with "Hey", echo=5 and intense=True: with_big_echo
with_big_echo = shout_echo("Hey", echo=5, intense=True)

# Call shout_echo() with "Hey" and intense=True: big_no_echo
big_no_echo = shout_echo("Hey", intense=True)

print(with_big_echo)
print(big_no_echo)

In [None]:
def gibberish(*args):
    """Concatenate strings in *args together."""
    hodgepodge = ""                       # initialize an empty string: hodgepodge

    for word in args:                     # iterating through the tuple, args
        hodgepodge += word

    return hodgepodge

one_word = gibberish("luke")
many_words = gibberish("luke", "leia", "han", "obi", "darth")

print(one_word)
print(many_words)

In [None]:
def report_status(**kwargs):
    """Print out the status of a movie character."""
    print("\nBEGIN: REPORT\n")
    
    for i, j in kwargs.items():            # iterate over the key-value pairs of kwargs
        print(i + ": "")                # print out the keys and values
        print(j)
        
    print("\nEND REPORT")

# First call to report_status()
report_status(name="luke", affiliation="jedi", status="missing")

# Second call to report_status()
report_status(name="anakin", affiliation="sith lord", status="deceased")

# Third call to report_status()
report_status(name="yoda", affiliation="jedi master", status="deceased", heigh=1.1, weight=30, color="green")

In [None]:
import pandas as pd

tweets_df = pd.read_csv("./data/tweets.csv")

def count_entries(df, col_name="lang"):
    """Return a dictionary with counts of occurrences as value for each key."""
    cols_count = {}
    col = df[col_name]
    
    for entry in col:
        if entry in cols_count.keys():
            cols_count[entry] += 1
        else:
            cols_count[entry] = 1

    return cols_count

# Call count_entries(): result1
result1 = count_entries(tweets_df)

# Call count_entries(): result2
result2 = count_entries(tweets_df, "source")

print(result1)
print(result2)

In [None]:
import pandas as pd

tweets_df = pd.read_csv("./data/tweets.csv")

def count_entries(df, *args):
    """Return a dictionary with counts of occurrences as value for each key."""
    cols_count = {}
    
    for col_name in args:
        col = df[col_name]
        for entry in col:
            if entry in cols_count.keys():
                cols_count[entry] += 1
            else:
                cols_count[entry] = 1

    return cols_count

# Call count_entries(): result1
result1 = count_entries(tweets_df, "lang")

# Call count_entries(): result2
result2 = count_entries(tweets_df, "lang", "source")

print(result1)
print(result2)

## Lambda Functions and Error Handling

Some function definitions are simple enough that they can be converted to a lambda function.  
By doing this, you write less lines of code, this is handy especially when you're writing and maintaining big programs.  

In [None]:
# Define echo_word as a lambda function: echo_word
echo_word = (lambda word1, echo: word1*echo)

# Call echo_word: result
result = echo_word("hey", 5)

print(result)

In [None]:
# Define echo_word as a lambda function: echo_word
echo_word = (lambda word1, echo: word1*echo)

# Call echo_word: result
result = echo_word("hey", 5)

print(result)

The best use case for lambda functions, however, are for when you want these simple functionalities to be anonymously embedded within larger expressions.  
What that means is that the functionality is not stored in the environment, unlike a function defined with def. 

In [None]:
spells = ["protego", "accio", "expecto patronum", "legilimens"]

# Use map() to apply a lambda function over spells: shout_spells
shout_spells = map(lambda a: a + "!"*3, spells)

print(type(shout_spells))

shout_spells_list = list(shout_spells)

print(shout_spells_list)

In [None]:
fellowship = ['frodo', 'samwise', 'merry', 'pippin', 'aragorn', 'boromir', 'legolas', 'gimli', 'gandalf']

# Use filter() to apply a lambda function over fellowship: result
result = filter(lambda member: len(member) > 6, fellowship)

print(type(result))

result_list = list(result)

print(result_list)

In [None]:
from functools import reduce
import numpy as np

stark = ['robb', 'sansa', 'arya', 'brandon', 'rickon']

# Use reduce() to apply a lambda function over stark: result
result = reduce(lambda item1, item2: item1 + item2, stark)

print(type(result))
print(result)

factorial_inputs = [1, 2, 3, 4, 5, 6]
n_factorial = reduce(lambda x, y: x*y, factorial_inputs)

print(type(n_factorial))
print(n_factorial)

n_factorial_v2 = reduce(lambda x, y: x*y, np.linspace(1, 6, 6))

print(type(n_factorial_v2))
print(n_factorial_v2)

In [None]:
def shout_echo(word1, echo=1):
    """Concatenate echo copies of word1 and three exclamation marks at the end of the string."""

    echo_word = ""
    shout_words = ""

    # Add exception handling with try-except
    try:
        echo_word = word1*echo
        shout_words = echo_word + "!"*3
    except:
        print("word1 must be a string and echo must be an integer.")

    return shout_words

working_echo = shout_echo("particle", echo=2)
not_working_echo1 = shout_echo("particle", echo="accelerator")
not_working_echo2 = shout_echo(1, echo="accelerator")

print(working_echo)
print(not_working_echo1)
print(not_working_echo2)       # this is a bug in the try-except block logic - the echo argument is non-sensical however the function still generates an output

In [None]:
def shout_echo(word1, echo=1):
    """Concatenate echo copies of word1 and three exclamation marks at the end of the string."""
    if echo < 0:
        raise ValueError("echo must be greater than or equal to 0")

    echo_word = word1 * echo
    shout_word = echo_word + '!!!'

    return shout_word

shout_echo("particle", echo=2)

In [None]:
import pandas as pd

tweets_df = pd.read_csv("./data/tweets.csv")

# Select retweets from the Twitter DataFrame: result
result = filter(lambda x: x[0:2] == "RT", tweets_df["text"])

# Create list from filter object result: res_list
res_list = list(result)

# Print all retweets in res_list
for tweet in res_list:
    print(tweet)

In [None]:
import pandas as pd

tweets_df = pd.read_csv("./data/tweets.csv")

def count_entries(df, col_name='lang'):
    """Return a dictionary with counts of occurrences as value for each key."""
    cols_count = {}

    try:
        col = df[col_name]
        
        for entry in col:
            if entry in cols_count.keys():
                cols_count[entry] += 1
            else:
                cols_count[entry] = 1
    
        return cols_count

    except:
        print('The DataFrame does not have a ' + col_name + ' column.')

result1 = count_entries(tweets_df, 'lang1')
print(result1)

In [None]:
def count_entries(df, col_name='lang'):
    """Return a dictionary with counts of occurrences as value for each key."""
    
    # Raise a ValueError if col_name is NOT in DataFrame
    if col_name not in df.columns:
        raise ValueError('The DataFrame does not have a ' + col_name + ' column.')

    cols_count = {}
    col = df[col_name]
    
    for entry in col:
        if entry in cols_count.keys():
            cols_count[entry] += 1
        else:
            cols_count[entry] = 1
        
    return cols_count

result1 = count_entries(tweets_df, "lang")
print(result1)