## Imports

In [2]:
# Standard Library Imports
import datetime
import random
from string import ascii_letters
from time import perf_counter
from operator import truediv, mul, add
from collections import namedtuple, Counter
from typing import NamedTuple

# Third-Party Imports
!pip install faker
from faker import Faker

Collecting faker
[?25l  Downloading https://files.pythonhosted.org/packages/e4/be/d03e305f1d879f541c23d586023ec11b124d6cc3d75a95159470754134d9/Faker-8.10.0-py3-none-any.whl (1.2MB)
[K     |▎                               | 10kB 11.2MB/s eta 0:00:01[K     |▋                               | 20kB 15.7MB/s eta 0:00:01[K     |▉                               | 30kB 19.7MB/s eta 0:00:01[K     |█▏                              | 40kB 22.0MB/s eta 0:00:01[K     |█▍                              | 51kB 19.1MB/s eta 0:00:01[K     |█▊                              | 61kB 12.8MB/s eta 0:00:01[K     |██                              | 71kB 13.0MB/s eta 0:00:01[K     |██▎                             | 81kB 14.1MB/s eta 0:00:01[K     |██▌                             | 92kB 14.2MB/s eta 0:00:01[K     |██▉                             | 102kB 14.2MB/s eta 0:00:01[K     |███                             | 112kB 14.2MB/s eta 0:00:01[K     |███▍                            | 122kB 14.2MB/

In [3]:
# Seeding to ensure same data generation for test cases
fake = Faker()
Faker.seed(0)

In [4]:
def is_namedtuple_instance(instance) -> bool:
    """
    Function to check whether a variable is an instance of a namedtuple
    :param instance: variable to be checked
    :return: True/False
    """
    # Check type and base classes
    _type = type(instance)
    _bases = _type.__bases__

    # Tuple is the parent class of namedtuple so if tuple is not present in bases then return False
    if len(_bases) != 1 or _bases[0] != tuple:
        return False

    # Named-tuples have _fields attributes like dictionary so return False if attributes are not instance of tuples
    fields_ = getattr(_type, '_fields', None)
    if not isinstance(fields_, tuple):
        return False

    return all(type(field) == str for field in fields_)

In [5]:
def generate_profiles(number_of_samples) -> list:
    """
    Function to generate profiles using faker library
    :param number_of_samples: Number of profiles need to be generated
    :return: List of dictionaries(generated profiles)
    """
    print("Generating Profiles ....")
    profiles = []
    [profiles.append(fake.profile()) for i in range(number_of_samples)]
    return profiles


In [6]:
def dictionary_operations(_list_of_dictionaries: list) -> dict:
    """
    Function to calculate the blood type count, mean_location, oldest person age, and average age of all the profiles
    from the sample profiles.
    :param _list_of_dictionaries: list of generated profiles which are of dictionary type
    :return: dictionary of blood group count, mean location, name and age of oldest person, and average age of all
    the profiles
    """
    # Lists, dictionaries and variables to collect the profiles' data which is used for calculations
    _blood_group = []
    x_location = []
    y_location = []
    _days = dict()
    temp_ = dict()
    _index = None
    oldest_days = None

    if isinstance(_list_of_dictionaries, list) and len(_list_of_dictionaries) > 0 and isinstance(_list_of_dictionaries[0], dict):
        for index, profile in enumerate(_list_of_dictionaries):
            for parameter, data in profile.items():
                # Update the blood group data
                if parameter == "blood_group":
                    _blood_group.append(data)

                # Calculate mean current location
                if parameter == "current_location":
                    x_location.append(data[0])
                    y_location.append(data[1])

                # Oldest person age
                if parameter == "birthdate":
                    _days[index] = (datetime.date.today() - data).days
                    oldest_days = max(_days.values())

        # Find the indexes of all the persons with same oldest age in days
        _indexes = [key for key, value in _days.items() if value == oldest_days]
        for _index in _indexes:
            temp_[_list_of_dictionaries[_index]["name"]] = oldest_days

        _blood_group_count = Counter(_blood_group)

        x_mean = truediv(sum(x_location), len(_list_of_dictionaries))
        y_mean = truediv(sum(y_location), len(_list_of_dictionaries))

        # Average age
        avg_age = truediv(sum(_days.values()), len(_list_of_dictionaries))

        output = {'blood_count': _blood_group_count,
                  'mean_location': (x_mean, y_mean),
                  'name_age_of_oldest_person': temp_,
                  'average_age_of_profiles': avg_age}
        return output
    elif type(_list_of_dictionaries) is not list:
        raise TypeError(f"Expected input data is list but received {type(_list_of_dictionaries)}")
    elif len(_list_of_dictionaries) == 0:
        raise ValueError("Enter Valid data. Empty list passed to the function")
    elif type(_list_of_dictionaries[0]) is not dict:
        raise TypeError(f"Enter correct type of data. Data passed is {type(_list_of_dictionaries[0])} and expected "
                        f"data is dictionary")

In [7]:
def namedtuple_operations(list_of_tuples) -> NamedTuple:
    """
    Function to perform operations of namedtuple
    :param list_of_tuples: list of generated profiles in namedtuple datatype
    :return: namedtuple of blood_group_count, mean_location, name_of_oldest_person, age and average age of all profiles
    """
    # Variables for calculations
    _blood_list = []
    x_data = []
    y_data = []
    _days = []

    # Define and add docstring to the output namedtuple
    Output = namedtuple('Output', "blood_group_count mean_location name_of_oldest_person age average_age")
    Output.__doc__ = "NamedTuple for the output of the calculations"
    Output.blood_group_count.__doc__ = "Total number of individuals having respective blood group in the dataset"
    Output.mean_location.__doc__ = "Mean location of all the individuals in the profiles"
    Output.name_of_oldest_person.__doc__ = "Name of the oldest individual in the profiles"
    Output.age.__doc__ = "Age of the oldest individual in days"
    Output.average_age.__doc__ = "Average age of all the individuals in the profiles"

    if len(list_of_tuples) > 0 and is_namedtuple_instance(list_of_tuples[0]) and isinstance(list_of_tuples, list):
        for _tuple in list_of_tuples:
            # Append data to a list to calculate the blood group count
            _blood_list.append(_tuple.blood_group)

            # Append current location (x, y) to the list for mean calculations
            x_data.append(_tuple.current_location[0])
            y_data.append(_tuple.current_location[1])

            # Oldest Person Age
            _days.append((datetime.date.today() - _tuple.birthdate).days)

        # Code to calculate the count of blood group
        _blood_group_count = Counter(_blood_list)

        # Calculations for mean location
        x_mean = truediv(sum(x_data), len(list_of_tuples))
        y_mean = truediv(sum(y_data), len(list_of_tuples))

        # Code to check the name of oldest person
        oldest_days = max(_days)
        oldest_person_names = [list_of_tuples[index].name for index, _day in enumerate(_days) if _day == oldest_days]

        # Average age calculations
        avg_age = truediv(sum(_days), len(list_of_tuples))

        # Create an instance of output namedtuple to return the calculated data
        output = Output(blood_group_count=_blood_group_count,
                        mean_location=(x_mean, y_mean),
                        name_of_oldest_person=oldest_person_names,
                        age=oldest_days,
                        average_age=avg_age)
        return output
    elif type(list_of_tuples) is not list:
        raise TypeError(f"Expected input data is list but received {type(list_of_tuples)}")
    elif len(list_of_tuples) == 0:
        raise ValueError("Enter Valid data. Empty list passed to the function")
    elif not is_namedtuple_instance(list_of_tuples[0]):
        raise TypeError(f"Enter correct type of data. Data passed is {type(list_of_tuples[0])} and expected "
                        f"data is namedtuple")

In [8]:
def compare_namedtuple_and_dictionaries() -> None:
    """
    Function to compare the performance of namedtuples and dictionaries over the 10K profile for same data output
    :return: None
    """
    # Variables used for calculations
    list_of_named_tuples = []

    # Generate 10K profiles
    list_of_dictionaries = generate_profiles(10_000)

    # Perform operations on the dictionary
    start = perf_counter()
    _ = dictionary_operations(list_of_dictionaries)
    elapsed_dict = perf_counter() - start
    print("Output: ", _)
    print("Operation Time on Dictionary: ", elapsed_dict)
    print()

    # Sample profile to create named tuple fields
    sample_profile = list_of_dictionaries[1]

    # Create and add docstring to the namedtuple profile
    PersonProfile = namedtuple('PersonProfile', sorted(sample_profile.keys()))
    PersonProfile.__doc__ = "Profile of an individual containing data associated with that individual"
    PersonProfile.address.__doc__ = "Address of an individual in String format"
    PersonProfile.birthdate.__doc__ = "Date of birth of an individual in datetime.date(year, month, day) format"
    PersonProfile.blood_group.__doc__ = "Blood group of an individual in String format"
    PersonProfile.company.__doc__ = "Name of a company with which an individual is associated with in String format"
    PersonProfile.current_location.__doc__ = "Current location of an individual in tuple of Decimals format"
    PersonProfile.job.__doc__ = "Job of an individual in String format"
    PersonProfile.mail.__doc__ = "Email address of an individual in String format"
    PersonProfile.name.__doc__ = "Name of an individual in String format"
    PersonProfile.residence.__doc__ = "Residence of an individual in String format"
    PersonProfile.sex.__doc__ = "Gender of an individual in String format"
    PersonProfile.ssn.__doc__ = "Social Security Number of an individual in String format"
    PersonProfile.username.__doc__ = "Username of an individual in String format"
    PersonProfile.website.__doc__ = "Websites of an individual in list of String format"

    # Create a list of named tuple
    [list_of_named_tuples.append(PersonProfile(**profile)) for profile in list_of_dictionaries]
    print(list_of_named_tuples[0])
    print()

    # Perform same operations on the namedtuple
    start = perf_counter()
    _ = namedtuple_operations(list_of_named_tuples)
    elapsed_named_tuple = perf_counter() - start
    print("Output: ", _)
    print("Operation Time on NamedTuples: ", elapsed_named_tuple)

    print()
    print(f"Tuples are faster than dictionaries by {elapsed_dict / elapsed_named_tuple} times")

In [9]:
# Part one of comparing performance of named-tuples and dictionaries
compare_namedtuple_and_dictionaries()

Generating Profiles ....
Output:  {'blood_count': Counter({'AB+': 1300, 'O-': 1284, 'B+': 1274, 'O+': 1251, 'A+': 1249, 'B-': 1230, 'AB-': 1216, 'A-': 1196}), 'mean_location': (Decimal('0.4583500264'), Decimal('-0.5367035854')), 'name_age_of_oldest_person': {'Jay Montes': 42363}, 'average_age_of_profiles': 21306.7393}
Operation Time on Dictionary:  1.1294328550000046

PersonProfile(address='711 Golden Overpass\nWest Andreaville, MA 71317', birthdate=datetime.date(1944, 9, 2), blood_group='B+', company='Williams-Sheppard', current_location=(Decimal('52.958961'), Decimal('143.143712')), job='Musician', mail='tamaramorrison@hotmail.com', name='Gary Cross', residence='Unit 5938 Box 2421\nDPO AP 33335', sex='M', ssn='498-52-4970', username='leeashley', website=['http://www.rivera.com/', 'http://grimes-green.net/', 'http://www.larsen.com/'])

Output:  Output(blood_group_count=Counter({'AB+': 1300, 'O-': 1284, 'B+': 1274, 'O+': 1251, 'A+': 1249, 'B-': 1230, 'AB-': 1216, 'A-': 1196}), mean_loc

In [11]:
def generate_price(share_price, percentage_change, high=False):
    """
    Function to generate prices of the share
    :param high: True to generate the highest price of the stock
    :param share_price: Random price of a share
    :param percentage_change: Maximum percentage change in share price
    :return: share price
    """
    delta = mul(truediv(percentage_change, 100), share_price)
    if high:
        return share_price + delta
    else:
        stock_price = random.randint(int(share_price - delta), int(share_price + delta))
        return stock_price

In [12]:
def generate_stock_data(number_of_companies):
    """
    Function to generate stock data for 100 companies
    :param number_of_companies: number of companies for which data is generated
    :return: NamedTuple with fields - name, symbol, open, high, close
    """
    _list_of_companies = []
    market_percentage_fluctuation = 5
    share_min_price = 100
    share_max_price = 2000
    symbol_length = 3
    _list_of_market_cap = []
    _list_of_company_symbol = []

    CompanyStock = namedtuple('CompanyStock', "name symbol open high close market_cap company_weight",
                              defaults=[None] * 7)
    CompanyStock.name.__doc__ = "Name of the company"
    CompanyStock.symbol.__doc__ = "Symbol of the company"
    CompanyStock.open.__doc__ = "Opening share price of the company"
    CompanyStock.close.__doc__ = "Closing share price of the company"
    CompanyStock.market_cap.__doc__ = "Market capital of the company"
    CompanyStock.company_weight.__doc__ = "Weight of the company on stock exchange"

    for i in range(number_of_companies):
        # Generate company name
        company_name = fake.company()

        # Generate company symbol
        company_symbol = company_name[0:symbol_length].upper()
        company_symbol = company_symbol.replace(' ', random.choice(ascii_letters))
        company_symbol = company_symbol.replace(',', random.choice(ascii_letters))
        _list_of_company_symbol.append(company_symbol)

        # Generate a random market_cap for the company
        company_market_cap = random.randint(1_000_000, 1_000_000_000)
        _list_of_market_cap.append(company_market_cap)

        # Opening price of company's stock
        company_open_price = generate_price(random.randint(share_min_price, share_max_price),
                                            random.randint(0, market_percentage_fluctuation))

        # High price of company's stock
        company_high_price = generate_price(company_open_price, market_percentage_fluctuation, high=True)

        # Closing price of company's stock
        company_close_price = generate_price(company_open_price, random.randint(0, market_percentage_fluctuation))

        company = CompanyStock(name=company_name, symbol=company_symbol, open=company_open_price,
                               high=company_high_price, close=company_close_price, market_cap=company_market_cap,
                               company_weight=None)

        # Append the stock data in namedtuple format
        _list_of_companies.append(company)

    _opening_market_value = sum(_list_of_market_cap)
    _list_of_companies = [company._replace(company_weight=(company.market_cap / _opening_market_value)) for company in _list_of_companies]

    return _list_of_companies, _opening_market_value, _list_of_company_symbol

In [13]:
def stock_market_():
    """
    Generate one instance of the market and calculate the change in the market points
    :return:
    """
    # Variables used for the code
    _market_trades = []
    _new_market_value = []

    # Generate the stock data in namedtuple format for 100 companies
    list_of_companies, opening_market_value, list_of_company_symbol = generate_stock_data(100)

    for _company in list_of_companies:
        new_company_value = add(_company.market_cap, truediv(mul(_company.market_cap, random.randint(-10, 10)), 100))
        _new_market_value.append(new_company_value)

    print(f"Opening Market Value: {opening_market_value}")
    print(f"New Values: {_new_market_value}")
    current_market_value = sum(_new_market_value)
    print(f"Current Market Value: {current_market_value}")
    market_change_in_points = (current_market_value - opening_market_value) / opening_market_value
    print(f"Change: {100 + market_change_in_points}")

In [14]:
# Part two of generating stock data
stock_market_()

Opening Market Value: 47239371634
New Values: [532998850.32, 65998289.41, 179308741.81, 601431431.5, 742490449.61, 217376248.32, 760343242.32, 50531451.6, 37752667.4, 254543717.68, 785061584.8, 396451233.6, 812964640.5, 610012726.29, 916183324.46, 161378485.92, 838896193.15, 725054008.7, 428408576.22, 119467089.98, 164251540.0, 489020447.7, 540954987.63, 587299264.2, 817979424.18, 391327699.5, 961220296.67, 752520644.16, 456004636.38, 243408756.8, 889161186.87, 886836793.5, 807456306.24, 734161680.0, 64929151.28, 940001559.1, 230407019.7, 751812063.75, 105429794.26, 858999329.0, 144613743.6, 780710837.94, 203361302.52, 469083099.84, 285429638.62, 335327986.4, 980655428.55, 433696559.65999997, 487649765.08, 67962346.2, 548203098.35, 373922129.9, 8402584.48, 445380169.43, 525490765.3, 527613492.7, 372748115.52, 71081991.75, 216463503.64, 228500563.2, 363006207.0, 764311228.25, 996198621.3, 233883315.36, 115601927.0, 708457226.88, 768829171.67, 702404582.66, 445355469.9, 725267671.74, 152