# Python Functional Programming

### Set-Up

In [1]:
from pyspark import SparkConf, SparkContext
conf = (SparkConf()
            .setMaster("local[*]")
            .setAppName("My app")
            .set("spark.executor.memory","1g"))
sc = SparkContext(conf = conf)

In [2]:
# New RDD, not so common
pairs = sc.wholeTextFiles("file:///usr/local/spark/licenses/")

In [3]:
x = pairs.take(1)

### Data Structures

__Strings__

In [95]:
a = 'as you like it'

In [96]:
a.capitalize()
a.title()

'As You Like It'

In [97]:
print("P\ti\nca\Osso")
print(r"C:\myscript.py")

P	i
ca\Osso
C:\myscript.py


In [98]:
print(a[:8])
print(a[0::3])

as you l
ay ki


<br>

__Lists__

[ref](http://www.bogotobogo.com/python/python_lists.php)

In [99]:
b = list(a)
b

['a', 's', ' ', 'y', 'o', 'u', ' ', 'l', 'i', 'k', 'e', ' ', 'i', 't']

In [100]:
print(b.pop())
b.extend('s')
print(b)

t
['a', 's', ' ', 'y', 'o', 'u', ' ', 'l', 'i', 'k', 'e', ' ', 'i', 's']


In [101]:
b.append('es')
print(b)

['a', 's', ' ', 'y', 'o', 'u', ' ', 'l', 'i', 'k', 'e', ' ', 'i', 's', 'es']


In [102]:
b.insert(0,'SK')
print(b)

['SK', 'a', 's', ' ', 'y', 'o', 'u', ' ', 'l', 'i', 'k', 'e', ' ', 'i', 's', 'es']


In [103]:
b[(len(b)-1)]
b.remove('es')
print(b)

['SK', 'a', 's', ' ', 'y', 'o', 'u', ' ', 'l', 'i', 'k', 'e', ' ', 'i', 's']


In [104]:
b.sort()
print(b)
print( b.reverse() )
print(b)

[' ', ' ', ' ', 'SK', 'a', 'e', 'i', 'i', 'k', 'l', 'o', 's', 's', 'u', 'y']
None
['y', 'u', 's', 's', 'o', 'l', 'k', 'i', 'i', 'e', 'a', 'SK', ' ', ' ', ' ']


__List comprehension__

`[expression(i) for i in old_list if filter(i)]`

In [116]:
[i.upper() for i in b if i in ['s','a','e']]

['S', 'S', 'E', 'A']

In [133]:
c = list(range(0,10))
print(c)
print( [i**2 for i in c] )

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]


__Lambda expressions__

[ref](https://www.bogotobogo.com/python/python_functions_lambda.php)

In [251]:
death = [
    ('James', 'Dean', 24),
    ('Jimi', 'Hendrix', 27),
    ('George', 'Gershwin', 38),
]

sorted(death, key=lambda age: age[2])

[('James', 'Dean', 24), ('Jimi', 'Hendrix', 27), ('George', 'Gershwin', 38)]

__Dictionaries, Tuples, Sets__

In [103]:
 M = [ [1, 2, 3],
          [4, 5, 6],
          [7, 8, 9] ]

In [104]:
# Create a set of row sums
{sum(A) for A in M}
{24, 6, 15}
# Creates key/value table of row sums
{i : sum(M[i]) for i in range(3)}

{0: 6, 1: 15, 2: 24}

__Numpy Arrays__

[ref](http://www.bogotobogo.com/python/python_numpy_array_tutorial_basic_A.php)

In [130]:
import numpy as np
a = np.array([[1, 2], [3, 4]], dtype=np.float64) 
print(a)

[[ 1.  2.]
 [ 3.  4.]]


In [125]:
a*2

array([[ 2.,  4.],
       [ 6.,  8.]])

In [126]:
a+a

array([[ 2.,  4.],
       [ 6.,  8.]])

In [127]:
a*a

array([[  1.,   4.],
       [  9.,  16.]])

In [132]:
np.random.random((2,5))

array([[ 0.2492472 ,  0.31904546,  0.18686166,  0.77883412,  0.72418304],
       [ 0.15619785,  0.64542489,  0.491113  ,  0.45878505,  0.57735399]])

__Numpy Matrices__

[ref](http://www.bogotobogo.com/python/python_numpy_matrix_tutorial.php)

__Numpy with Pandas__

[ref](http://www.bogotobogo.com/python/python_Pandas_NumPy_Matplotlib.php)

__Iterators, iterables, generators__
* [ref-1](https://stackoverflow.com/questions/2776829/difference-between-pythons-generators-and-iterators)
* [ref-2](https://stackoverflow.com/questions/231767/what-does-the-yield-keyword-do?rq=1)

```text
Iterators: __next__, __iter__
iterables
generators: __yield__

```


Iterator is a more general concept than iterable: any object whose class has a next method (`__next__` in Python 3) and an `__iter__` method that does return self.

Iterables use iterators.  Everything you can use "for... in..." on is an iterable; lists, strings, files... .  But iterables are stored in memory.

A generator is built by calling a function that has one or more yield expressions.  Generators are iterators, a kind of iterable you can only iterate over once. Generators do not store all the values in memory, they generate the values on the fly.  Syntax is just the same except you used `()` instead of `[]`

__Generators__

In [137]:
gen = (x**2 for x in range(0,3))
[print(x) for x in gen]
[print(x) for x in gen]

0
1
4


[]

In [246]:
#list comprehension
the_list = [2**x for x in range(5)]
#generator expression
the_generator = (2**x for x in range(5))
type(the_list), type(the_generator)

(list, generator)

In [247]:
iterable = the_generator.__iter__()
print( next(iterable) )
print( next(iterable) )

1
2


In [249]:
iterable.__next__()

4

In [228]:
#generator object that supports the iteration protocol
list(the_generator)

[2, 4, 8, 16]

In [220]:
def counter(n):
    print('generator created')
    i = 0
    while n > i:
        yield i
        print(i)
        i += 1

In [221]:
c = counter(5)
next(c)

generator created


0

In [229]:
list(c)

0
1
2
3
4


[1, 2, 3, 4]

In [223]:
list(range(5))

[0, 1, 2, 3, 4]

In [225]:
for i in range(5):
    print(i)

0
1
2
3
4


In [236]:
#create an iterable from a collection
x = iter( [0,1,2,3,4,5] )
print(next(x))
print(next(x))

0
1


### Regular Expressions

[python regex](https://regexone.com/references/python)

In [28]:
import re

In [32]:
y = x[0][1]

In [59]:
y = y.lower()
y

'the mit license (mit)\n\ncopyright (c) <year> <copyright holders>\n\npermission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "software"), to deal in the software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the software, and to permit persons to whom the software is furnished to do so, subject to the following conditions:\n\nthe above copyright notice and this permission notice shall be included in all copies or substantial portions of the software.\n\nthe software is provided "as is", without warranty of any kind, express or implied, including but not limited to the warranties of merchantability, fitness for a particular purpose and noninfringement. in no event shall the authors or copyright holders be liable for any claim, damages or other liability, whether in an action of contract, tort or otherwise, arisi

In [129]:
regex = re.compile(r"\b(a|an|the|this)\b")
y_noart = regex.sub(r"", y)
y_noart

' mit license (mit)\n\ncopyright (c) <year> <copyright holders>\n\npermission is hereby granted, free of charge, to any person obtaining  copy of  software and associated documentation files ( "software"), to deal in  software without restriction, including without limitation  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of  software, and to permit persons to whom  software is furnished to do so, subject to  following conditions:\n\n above copyright notice and  permission notice shall be included in all copies or substantial portions of  software.\n\n software is provided "as is", without warranty of any kind, express or implied, including but not limited to  warranties of merchantability, fitness for  particular purpose and noninfringement. in no event shall  authors or copyright holders be liable for any claim, damages or other liability, whether in  action of contract, tort or otherwise, arising from, out of or in connection with  software 

In [159]:
regex2 = re.compile(r"\b(in|of|whom|with|to)\b")
y_noprep = regex2.sub(r"", y_noart)

In [160]:
match = re.findall('(\w+)\s+software', y_noprep)
#match = re.finditer('software', y)
#l = list(match)

In [162]:
match

['copy', 'deal', 'copies', 'persons', 'portions', 'connection', 'dealings']

In [63]:
match = re.search('mit', y)
print( '%s %s' % (match.start(), match.end()) )

4 7


In [54]:
match = re.search('(Software)', y)
match.group(0)

'Software'

In [167]:
y1 = re.split( '\s',y)
y1[0:5]

['the', 'mit', 'license', '(mit)', '']

In [171]:
z = y.split(' ')
z[0:5]

['the', 'mit', 'license', '(mit)\n\ncopyright', '(c)']

### Context Managers

[ref](https://jeffknupp.com/blog/2016/03/07/python-with-context-managers/)

__WRONG__: this will _leak the file descriptor_
```
files = []
for x in range(100000):
    files.append(open('foo.txt', 'w'))
```
Failure to close file descriptors will lead you to discover that there is (usually) a limit to the number of file descriptors a process can be assigned. 

The following command provides the upper limit of the number of handles that can a process can be assigned.

In [21]:
! ulimit -n

524288


In [20]:
with open('README.md', 'r') as infile:
    for line in infile:
        print(line)

# Apache Spark



Spark is a fast and general cluster computing system for Big Data. It provides

high-level APIs in Scala, Java, Python, and R, and an optimized engine that

supports general computation graphs for data analysis. It also supports a

rich set of higher-level tools including Spark SQL for SQL and DataFrames,

MLlib for machine learning, GraphX for graph processing,

and Spark Streaming for stream processing.



<http://spark.apache.org/>





## Online Documentation



You can find the latest Spark documentation, including a programming

guide, on the [project web page](http://spark.apache.org/documentation.html).

This README file only contains basic setup instructions.



## Building Spark



Spark is built using [Apache Maven](http://maven.apache.org/).

To build Spark and its example programs, run:



    build/mvn -DskipTests clean package



(You do not need to do this if you downloaded a pre-built package.)



You can build Spark using more than one thread by usin

In [None]:
# Create a context manager by using __enter__() and __exit__() methods

class File():

    def __init__(self, filename, mode):
        self.filename = filename
        self.mode = mode

    def __enter__(self):
        self.open_file = open(self.filename, self.mode)
        return self.open_file

    def __exit__(self, *args):
        self.open_file.close()

files = []
for _ in range(10000):
    with File('foo.txt', 'w') as infile:
        infile.write('foo')
        files.append(infile)

In [None]:
#Everything before the call to yield is considered the code for __enter__(), after is __exit__()
from contextlib import contextmanager

@contextmanager
def open_file(path, mode):
    the_file = open(path, mode)
    yield the_file
    the_file.close()

files = []

for x in range(100000):
    with open_file('foo.txt', 'w') as infile:
        files.append(infile)

for f in files:
    if not f.closed:
        print('not closed')

### Decorators

### Closures

[ref](http://www.bogotobogo.com/python/python_closure.php)

In [32]:
def startAt(start):
	def incrementBy(inc):
		return start + inc   #or, return lambda inc: start+inc
	return incrementBy

f = startAt(10)
g = startAt(100)

print ('type(f)=%s' %(type(f)))
print ('f.__closure__=%s' %(f.__closure__))
print ('type(f.__closure__[0])=%s' %(type(f.__closure__[0])))
print ('f.__closure__[0].cell_contents=%s' %(f.__closure__[0].cell_contents))

print ('type(g)=%s' %(type(g)))
print ('g.__closure__=%s' %(g.__closure__))
print ('type(g.__closure__[0])=%s' %(type(g.__closure__[0])))
print ('g.__closure__[0].cell_contents=%s' %(g.__closure__[0].cell_contents))

type(f)=<class 'function'>
f.__closure__=<cell at 0x7f244beccd08: int object at 0x7f2487567b40>
type(f.__closure__[0])=<class 'cell'>
f.__closure__[0].cell_contents=10
type(g)=<class 'function'>
g.__closure__=<cell at 0x7f247e4ac648: int object at 0x7f2487568680>
type(g.__closure__[0])=<class 'cell'>
g.__closure__[0].cell_contents=100


### Exceptions

[ref](http://www.bogotobogo.com/python/python_try_except_finally_raise_syntax_error.php)

In [None]:
#ValuError supports input type mismatch
while True:
   try:
      age = int(input("Type in your guess : Age of the Universe : " ))
      print(age)
      break
   except ValueError:
      print("Please make sure you type in an integer")
   except:
      break
   finally:
      print("age loop" )

In [None]:
#Create your own Exception classes
class Negative(Exception):
   pass
def oops():
   raise Negative()

try:
   age = int(input("Type in your guess : Age of the Universe : " ))
   print(age)
   if age <= 0:
      print('calling ooops')
      oops()
except ValueError:
   print("Please make sure you type in an integer")
except Negative:
   print("Please make sure you type in a positive integer")
except:
   print("Somethings wrong!")
finally:
   print("finally!" )

### Map / Reduce / Filter

### Style of Functional Programming

* [ref](http://www.bogotobogo.com/python/python_functional_programming.php)
* [docs](https://docs.python.org/2/howto/functional.html)
* [ibm, part1](https://www.ibm.com/developerworks/library/l-prog/index.html)
* [ibm, part2](https://www.ibm.com/developerworks/library/l-prog2/index.html)