# Python code profilng
- Code / Example inspired by Sebastian Mathot: 
    - https://www.youtube.com/watch?v=8qEnExGLZfY

---

## Goal: Find all duplicate movies from a text file of 10,000 movie titles.

In [15]:
def read_movies(src):
    """Read movies from a text file, return the movie titles as a list"""
    
    with open(src) as f:
        movie_list = f.read().splitlines() 
        movie_list = [movie.lower() for movie in movie_list]
        return movie_list

In [20]:
def is_duplicate(item, collection):
    
    """Determine (True or False) whether a given item (i.e. movie)
       is in a collection of other movie titles (i.e. list).
       
       If you've exhausted the list of movies and found no matches, return False."""
    if item in collection:
        return True
    #for movie in collection:
    #    if movie == item:
    #        return True
        
    return False

In [17]:
@profile
def find_duplicate_movies(src='movies.txt'):
    
    """Return all movies that appear twice (i.e. duplicates) in the text file.
       Search through the list of movies systematically, collecting duplicates as you go."""
    
    movie_list = read_movies(src)
    duplicates = []
    
    while movie_list: 
        
        movie = movie_list.pop()
        
        if is_duplicate(movie, movie_list): 
            
            duplicates.append(movie)
            
    
    return duplicates

In [14]:
#%timeit duplicates = find_duplicate_movies()

---

### cProfile decorator:

In [9]:
import cProfile, pstats, io


def profile(fnc):
    
    """A decorator that uses cProfile to profile a function. 
       Starts the profile before executing a function, then exeuctes the function,
       then stops the profile, then prints out a diagnostics report.
       
       Lots of boilerplate code from the Python 3 documentation:
       https://docs.python.org/3/library/profile.html#profile.Profile
       """
    
    def inner(*args, **kwargs):
        
        pr = cProfile.Profile()
        pr.enable()  
        retval = fnc(*args, **kwargs)       
        pr.disable() 
        s = io.StringIO()
        sortby = 'cumulative'
        ps = pstats.Stats(pr, stream=s).sort_stats(sortby)
        ps.print_stats()
        print(s.getvalue())

        return retval

    return inner

In [18]:
#cProfile.run('find_duplicate_movies()')

In [21]:
find_duplicate_movies()

         30276 function calls in 0.447 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.004    0.004    0.447    0.447 <ipython-input-17-87c1d2ff59d4>:1(find_duplicate_movies)
    10000    0.437    0.000    0.437    0.000 <ipython-input-20-75331690f179>:1(is_duplicate)
        1    0.000    0.000    0.005    0.005 <ipython-input-15-0d67e89540a9>:1(read_movies)
        1    0.001    0.001    0.003    0.003 <ipython-input-15-0d67e89540a9>:6(<listcomp>)
    10000    0.001    0.000    0.001    0.000 {method 'lower' of 'str' objects}
        1    0.001    0.001    0.001    0.001 {method 'splitlines' of 'str' objects}
    10000    0.001    0.000    0.001    0.000 {method 'pop' of 'list' objects}
        1    0.000    0.000    0.000    0.000 {method 'read' of '_io.TextIOWrapper' objects}
        1    0.000    0.000    0.000    0.000 {built-in method io.open}
        1    0.000    0.000    0.000    0.000 /opt/anacon

['zookeeper (2011)',
 'miracle on 34th street (1994)',
 'babylon 5: thirdspace (1998)',
 'police academy 6: city under siege (1989)',
 'war of the worlds (2005)',
 'chaplin (1992)',
 'twelfth night (1996)',
 'memento (2000)',
 'fire and ice (2008)',
 'stan helsing (2009)',
 'intimate strangers (confidences trop intimes) (2004)',
 'anything for her (pour elle) (2008)',
 'simpatico (1999)',
 'high school musical 2 (2007)',
 'big blue, the (grand bleu, le) (1988)',
 'bedazzled (1967)',
 'remember me (ricordati di me) (2003)',
 'saturn 3 (1980)',
 '11:14 (2003)',
 "harry potter and the sorcerer's stone (a.k.a. harry potter and the philosopher's stone) (2001)",
 'thousand words, a (2012)',
 'carnosaur (1993)',
 'cold fish (tsumetai nettaigyo) (2010)',
 'very potter sequel, a (2010)',
 'antichrist (2009)',
 'captain horatio hornblower r.n. (1951)',
 'postman always rings twice, the (1981)',
 'red violin, the (violon rouge, le) (1998)',
 'sorority house massacre ii (1990)',
 'just jim (2015)'