# Session 10 Submissions by Sachin (Tupels and Named Tupels)

In [1]:
from faker import Faker
from collections import namedtuple
from collections import Counter
from datetime import datetime

In [2]:
# timer decorator
def timer_factory(repeat):
    """
    decorator factory to create a decorator.
    Inputs:
        repaet: number of times the decorated function will be called
    Returns:
        time_it: decorator
    """
    def time_it(func):
        """
        decorator function called to decorate a function by timing it.
        Inputs:
            func: name of function which has to be timed ( will be free variable)
        Returns:
            time_it: function
        """
        from time import perf_counter
        from functools import wraps
        # Check for a valid  function is passed to create a decorator
        if not hasattr(func, '__call__'):
            raise NameError(f"{func} is not a valid function")

        @wraps(func)
        def timer(*args, **kwargs):
            """
            find average time to execute a function while running it for n times.
            # Inputs:
                *args: positioned parameters
                **kwargs: named parameters
            # Returns:
                Returns output of function func(*args,**kwargs)

            # Functionality:
                Function check's how much time it takes on an average for n runs to execute a function

            For eg: after decorating fact function for repeat = 100, we will get how much time it takes for
            fact function to run for any given inputs on an average for 100 runs. In this case we will get -
            fact(5)
            Function fact takes average run time of 2.3930000179461787e-06 for 100 iterations
            """
            total_elapsed = 0
            for i in range(repeat):
                start = perf_counter()
                result = func(*args, **kwargs)
                end = perf_counter()
                total_elapsed += (end - start)
            avg_run_time = total_elapsed / repeat
            print(f'Function {func.__name__} takes average run time of {avg_run_time} for {repeat} iterations')
            return result, avg_run_time
        return timer
    return time_it

### 1. Use Faker library to get 10000 random profiles. Using namedtuple, calculate the largest blood type, mean-current_location, oldest_person_age and average age (add proper doc-strings)

In [3]:
# Create faker instance
fake = Faker()

In [4]:
# Get a dummy fake profile 
dummy = fake.profile()

In [5]:
# Explore keys
dummy.keys()

dict_keys(['job', 'company', 'ssn', 'residence', 'current_location', 'blood_group', 'website', 'username', 'name', 'sex', 'address', 'mail', 'birthdate'])

In [6]:
dummy

{'job': 'Physicist, medical',
 'company': 'Rush-Leblanc',
 'ssn': '476-25-4635',
 'residence': '99255 Erin Prairie Suite 819\nEast Matthew, WA 67939',
 'current_location': (Decimal('33.8836115'), Decimal('-49.397262')),
 'blood_group': 'AB-',
 'website': ['https://www.harvey.biz/'],
 'username': 'yking',
 'name': 'Lynn Dean',
 'sex': 'F',
 'address': '332 Jennifer Mountains Apt. 788\nEast Johnton, IA 54862',
 'mail': 'jeffreyhernandez@hotmail.com',
 'birthdate': datetime.date(1999, 11, 4)}

In [7]:
# Create a namedtuple class
FakePrf = namedtuple('FakePrf', dummy.keys())

In [8]:
# Check doc string
FakePrf.__doc__

'FakePrf(job, company, ssn, residence, current_location, blood_group, website, username, name, sex, address, mail, birthdate)'

In [9]:
# Overwrite doc string
FakePrf.__doc__ = "Represents random fake profile"

In [10]:
# Create a namedtuple class for fake profiles db
FakePrfDb = namedtuple('FakePrfDb', 'FakePrf_0')


In [11]:
# Overwrite doc string
FakePrfDb.__doc__ = "Represents database of random fake profile"

In [12]:
for i in range(10000):
    # get a fake profile
    f_prfl = fake.profile()
    
    # Create tuple from namedtuple class to store the f_prfl
    fake_profile = FakePrf(**f_prfl)
    
    
    # Add profile to Fake Profile DB
    if i==0:
        faker_db = FakePrfDb(fake_profile) 
    else:
        FakePrfDb = namedtuple('FakePrfDb', FakePrfDb._fields + ('FakePrf_'+str(i),))
        
        faker_db = FakePrfDb._make(faker_db + (fake_profile,))

In [13]:
#  calculate the largest blood type
@timer_factory(100)
def largest_bg(faker_db):
    """
    Return the most common blood group
    of fake profile db as named tuple
    """
    count = len(faker_db) 
    bl_grp = []
    for i in range(count):
        bl_grp.append(faker_db[i][5])
    return Counter(bl_grp).most_common(1)[0][0]

In [14]:
bg, time = largest_bg(faker_db)
print(bg)

Function largest_bg takes average run time of 0.0029362619999938034 for 100 iterations
AB-


In [15]:
# mean-current_location
@timer_factory(100)
def mean_current_location(faker_db):
    """
    Return the mean-current_location
    of fake profile db as named tuple
    """
    count = len(faker_db) 
    lat = []
    long = []
    for i in range(count):
        lat.append(faker_db[i][4][0])
        long.append(faker_db[i][4][1])
    return sum(lat)/count,sum(long)/count    

In [16]:
(lat,long),time = mean_current_location(faker_db)
print(lat,long)

Function mean_current_location takes average run time of 0.007118027999983951 for 100 iterations
0.378984243 1.3483958471


In [17]:
# oldest person's age
@timer_factory(100)
def oldest_person_age(faker_db):
    """
    Return the oldest person's age
    of fake profile db as named tuple
    """
    size = len(faker_db) 
    age = []
    days_in_year = 365.2425 
    today = datetime.date(datetime.today())
    for i in range(size):
        age.append((today - faker_db[i][12]).days / days_in_year)
    return max(age)

In [18]:
oldest_age, time= oldest_person_age(faker_db)
print(oldest_age)

Function oldest_person_age takes average run time of 0.0038962049999940974 for 100 iterations
115.98869244406113


In [19]:
# average age
@timer_factory(100)
def average_age(faker_db):
    """
    Return the average age
    of fake profile db as named tuple
    """
    count = len(faker_db) 
    age = []
    days_in_year = 365.2425 
    today = datetime.date(datetime.today())
    for i in range(count):
        age.append((today - faker_db[i][12]).days / days_in_year)
    return sum(age)/count

In [20]:
avg_age, time = average_age(faker_db)

Function average_age takes average run time of 0.00412065400000074 for 100 iterations


In [21]:
avg_age

58.61777750398718

### 2. Do the same thing above using a dictionary. Prove that namedtuple is faster

In [22]:
# Create a fake profile DB dictonary object
faker_db_dict = {}

In [37]:
for i in range(10000):
    # get a fake profile
    f_prfl = fake.profile()
    
    # prepare next key
    key = 'fk_pr_' + str(i+1)
        
    # Add profile to Fake Profile DB
    faker_db_dict[key] = f_prfl

In [38]:
#  calculate the largest blood type
@timer_factory(100)
def largest_bg_dict(faker_db_dict):
    """
    Return the most common blood group for
    fake profile db as dictionary
    """
    count = len(faker_db_dict) 
    bl_grp = []
    for i in range(count):
        key = 'fk_pr_' + str(i+1)
        bl_grp.append(faker_db_dict[key]['blood_group'])
    return Counter(bl_grp).most_common(1)[0][0]

In [39]:
largest_bg_dict(faker_db_dict)

Function largest_bg_dict takes average run time of 0.00868963699997039 for 100 iterations


('B-', 0.00868963699997039)

In [40]:
# mean-current_location
@timer_factory(100)
def mean_current_location_dict(faker_db_dict):
    """
    Return the mean-current_location
    of fake profile db as dictionary
    """
    count = len(faker_db_dict) 
    lat = []
    long = []
    for i in range(count):
        key = 'fk_pr_' + str(i+1)
        lat.append(faker_db_dict[key]['current_location'][0])
        long.append(faker_db_dict[key]['current_location'][1])
    return sum(lat)/count,sum(long)/count    

In [41]:
lat,long = mean_current_location_dict(faker_db_dict)
print(lat,long)

Function mean_current_location_dict takes average run time of 0.010586135999974431 for 100 iterations
(Decimal('-0.11236560485'), Decimal('1.5045699781')) 0.010586135999974431


In [42]:
# oldest person's age
@timer_factory(100)
def oldest_person_age_dict(faker_db_dict):
    """
    Return the oldest person's age
    of fake profile db as dictionary
    """
    size = len(faker_db_dict) 
    age = []
    days_in_year = 365.2425 
    today = datetime.date(datetime.today())
    for i in range(size):
        key = 'fk_pr_' + str(i+1)
        age.append((today - faker_db_dict[key]['birthdate']).days / days_in_year)
    return max(age)

In [43]:
oldest_person_age_dict(faker_db_dict)

Function oldest_person_age_dict takes average run time of 0.008685003999989931 for 100 iterations


(115.99143035106812, 0.008685003999989931)

In [44]:
# average age
@timer_factory(100)
def average_age_dict(faker_db_dict):
    """
    Return the average age
    of fake profile db as dictionary
    """
    count = len(faker_db_dict) 
    age = []
    days_in_year = 365.2425 
    today = datetime.date(datetime.today())
    for i in range(count):
        key = 'fk_pr_' + str(i+1)
        age.append((today - faker_db_dict[key]['birthdate']).days / days_in_year)
    return sum(age)/count

In [45]:
average_age_dict(faker_db_dict)

Function average_age_dict takes average run time of 0.00851932299994587 for 100 iterations


(57.96942880415096, 0.00851932299994587)

In [52]:
# Compare the Namped Tuple Vs Dict Performance
def compare_time(nt_db, dict_db):
    """
    function to compare the perforamce
    of nt vs dict
    """
    nt_func_list = [largest_bg,mean_current_location,oldest_person_age,average_age]
    nt_timer = 0
    for i in range(len(nt_func_list)):
        _, time = nt_func_list[i](nt_db)
        nt_timer += time
    
    print('\n',"========================")
    
    dict_func_list = [largest_bg_dict,mean_current_location_dict,oldest_person_age_dict,average_age_dict]
    dict_timer = 0
    for i in range(len(dict_func_list)):
        _, time = dict_func_list[i](dict_db)
        dict_timer += time    
    
    print('\n',"========================")
    print(f"{'Named Tuple'if dict_timer > nt_timer else 'Dictionory'} performs {round(dict_timer/nt_timer) if dict_timer > nt_timer else round(nt_timer/dict_timer)} times faster")
    print('\n',"========================")
    
    return(nt_timer, dict_timer)
    
    

In [53]:
compare_time(faker_db, faker_db_dict)

Function largest_bg takes average run time of 0.0025351170000249114 for 100 iterations
Function mean_current_location takes average run time of 0.004901571999971566 for 100 iterations
Function oldest_person_age takes average run time of 0.0033055330000388494 for 100 iterations
Function average_age takes average run time of 0.003617405999957555 for 100 iterations

Function largest_bg_dict takes average run time of 0.008090984000027675 for 100 iterations
Function mean_current_location_dict takes average run time of 0.011033325000016702 for 100 iterations
Function oldest_person_age_dict takes average run time of 0.008602530000043771 for 100 iterations
Function average_age_dict takes average run time of 0.009198861000013494 for 100 iterations

Named Tuple performs 3 times faster



(0.014359627999992882, 0.03692570000010164)

### 3. Create a fake data (you can use Faker for company names) for imaginary stock exchange for top 100 companies (name, symbol, open, high, close). Assign a random weight to all the companies. Calculate and show what value stock market started at, what was the highest value during the day and where did it end. Make sure your open, high, close are not totally random.

In [36]:
from operator import itemgetter

In [33]:
key=itemgetter(faker_db)

In [29]:
key

operator.itemgetter(FakePrfDb(FakePrf_0=FakePrf(job='Best boy', company='Hoffman PLC', ssn='792-19-9515', residence='584 Delacruz Route\nPort Nicole, PA 30307', current_location=(Decimal('12.707898'), Decimal('-0.223412')), blood_group='O-', website=['https://williams.org/', 'http://cox-vance.info/', 'http://wiley.org/', 'https://www.ramirez.com/'], username='davidtucker', name='Amanda Brown', sex='F', address='330 Webb Mission\nSouth Rebeccafurt, HI 57833', mail='espinozascott@hotmail.com', birthdate=datetime.date(1959, 4, 17)), FakePrf_1=FakePrf(job='Radiographer, diagnostic', company='Reed, Davis and Kemp', ssn='113-44-6344', residence='8254 Michael Rest Suite 002\nWatersland, IL 93406', current_location=(Decimal('73.348788'), Decimal('-138.714095')), blood_group='O+', website=['http://wells.com/', 'https://www.owens.biz/', 'http://jones.org/'], username='harrisluis', name='Jane Torres', sex='F', address='3798 Turner Wall\nLake Matthew, WI 16348', mail='swyatt@gmail.com', birthdate=

In [34]:
a = iter(faker_db)

In [35]:
next(a)[12]

datetime.date(1959, 4, 17)

In [36]:
min(faker_db, key=itemgetter(0))

FakePrf(job='Accountant, chartered public finance', company='Hatfield-Chambers', ssn='864-68-0751', residence='0318 Gonzalez Meadow Suite 631\nPort Kimberlymouth, CA 56788', current_location=(Decimal('-30.4244755'), Decimal('-103.687693')), blood_group='O-', website=['http://www.moses-king.com/', 'https://gilbert.com/', 'https://rhodes.info/', 'https://www.powell.biz/'], username='matthewwhite', name='Alexandra Green', sex='F', address='4126 Green Radial Suite 019\nByrdstad, CO 79487', mail='cynthiaharris@yahoo.com', birthdate=datetime.date(1931, 7, 2))

In [37]:
min=sorted(faker_db, key=next( iter(faker_db))[12])

TypeError: 'datetime.date' object is not callable

In [72]:
next( iter(faker_db))[12]

datetime.date(1915, 8, 11)

In [38]:
[x[12] for x in faker_db]

[datetime.date(1959, 4, 17),
 datetime.date(1966, 2, 18),
 datetime.date(1931, 7, 2),
 datetime.date(1977, 1, 26),
 datetime.date(1927, 10, 4)]

In [23]:
today = datetime.date(datetime.today())

In [24]:
[(today-x[12]).days/365 for x in faker_db]

[61.49315068493151,
 54.64657534246575,
 89.30410958904109,
 43.701369863013696,
 93.04931506849314]

In [25]:
lambda x: min((today-x[12]).days/365 for x in faker_db)

<function __main__.<lambda>(x)>

In [39]:
min((today-x[12]).days/365 for x in faker_db)

43.701369863013696

In [40]:
min(faker_db, key=itemgetter(1))

FakePrf(job='Building surveyor', company='Davis Ltd', ssn='593-79-4880', residence='6708 Cynthia Pines\nTimport, MO 50201', current_location=(Decimal('8.2763605'), Decimal('61.547507')), blood_group='A-', website=['http://www.jones.com/'], username='millerjacob', name='Jenna Smith', sex='F', address='4142 Angela Ranch\nLake Scottton, WI 65598', mail='michael76@yahoo.com', birthdate=datetime.date(1927, 10, 4))

In [45]:

itemgetter(i)(faker_db)[12]

datetime.date(1927, 10, 4)

In [1]:
a =[('a', 2), ('ee', 3), ('mm', 4), ('x', 1)]

In [7]:
min(a, key=itemgetter(1))

('x', 1)

In [3]:
min(a, key=lambda t: t[1])

('x', 1)

In [46]:
i

4