In [1]:
!pip install faker



In [24]:
from collections import namedtuple, Counter
import datetime
import re
import random
import string
from time import perf_counter
from functools import wraps
from faker import Faker

In [23]:
faker = Faker()

In [4]:
def timed(fn: "Function"):
    """
    Decorator to calculate run time of a function.
    """
    @wraps(fn)
    def calculate_time(*args, **kwargs):
        start = perf_counter()
        result = fn(*args, **kwargs)
        end = perf_counter()
        time_elapsed = (end - start)
        print('Run time: {0:.4f}s'.format(time_elapsed))
        return result
    return calculate_time

In [15]:
def generate_fake_profiles_np(num_profiles: int):
  """
  Function, takes a number as input and generates those many number
  of fake user profiles. Each of those profiles is stored as a named 
  tuple and a list of such namedtuples is returned.
  """
  fake_profiles = []
  CreateProfiles = namedtuple('CreateProfiles', " ".join(list((faker.profile()).keys())))
  for _ in range(num_profiles):
    fake_profiles.append(CreateProfiles(**faker.profile()))
  return fake_profiles

In [16]:
@timed
def calculate_time_np():
  """
  Makes use of the generated profiles, stored in the form of list of named
  tuples and process the profiles to give some results. The results include
  the blood group with maximum occurance, mean location calculated from the
  location of all the profiles, person Oldest person with the profile and the
  average age.
  """
  num_profiles = 10000
  fake_profiles = generate_fake_profiles_np(num_profiles)
  date_today = datetime.date.today()
  blood_group = dict()
  max_age = {'age': 0, 'proflie': None}
  cur_loc_coord_sum = [0, 0]
  sum_ages = 0
  for _ in fake_profiles:
    blood_group[_.blood_group] = blood_group[_.blood_group] + 1 if _.blood_group in blood_group else 1
    age = (date_today - _.birthdate).days
    if  age > max_age['age']:
      max_age['age'] = age
      max_age['profile'] = _
    cur_loc_coord_sum[0] += _.current_location[0]
    cur_loc_coord_sum[1] += _.current_location[1]
    sum_ages += int(age / 365)
  
  data = namedtuple('data', 'largest_blood_type mean_current_location oldest_person average_age')
  bg_l = max(blood_group, key = blood_group.get)
  mean_current_location = (cur_loc_coord_sum[0]/num_profiles, cur_loc_coord_sum[1]/num_profiles)
  return data((bg_l, blood_group[bg_l]), mean_current_location, (max_age['profile'], int(max_age['age']/365)), int(sum_ages/num_profiles))

In [17]:
calculate_time_np()

Run time: 47.0934s


data(largest_blood_type=('AB+', 1296), mean_current_location=(Decimal('1.04821986615'), Decimal('-0.4176551301')), oldest_person=(CreateProfiles(job='Chartered legal executive (England and Wales)', company='Holmes Inc', ssn='853-66-8060', residence='53432 Smith Summit Suite 995\nMelissaton, MT 64452', current_location=(Decimal('-51.8753525'), Decimal('-116.224890')), blood_group='B+', website=['https://www.payne-sanchez.com/', 'https://www.edwards-gonzalez.biz/', 'http://www.evans.com/', 'http://www.jones-gomez.info/'], username='osteele', name='Joshua Owens', sex='M', address='40104 David Expressway Apt. 304\nEast Victoriahaven, MS 86830', mail='ewilliams@gmail.com', birthdate=datetime.date(1904, 10, 14)), 116), average_age=57)

In [18]:
def generate_fake_profiles_dict(num_profiles):
  """
  Function, takes a number as input and generates those many number
  of fake user profiles. Each of those profiles is stored as a dictionary
  and a list of such dictionaries is returned.
  """
  fake_profiles = []
  for _ in range(num_profiles):
    fake_profiles.append(faker.profile())
  return fake_profiles

In [20]:
@timed
def calculate_time_dict():
  """
  Makes use of the generated profiles, stored in the form of list of dictionaries
  and process the profiles to give some results. The results include the blood
  group with maximum occurance, mean location calculated from the location of all
  the profiles, person Oldest person with the profile and the average age.
  """
  num_profiles = 10000
  fake_profiles = generate_fake_profiles_dict(num_profiles)
  date_today = datetime.date.today()
  blood_group = dict()
  max_age = {'age': 0, 'proflie': None}
  cur_loc_coord_sum = [0, 0]
  sum_ages = 0
  for _ in fake_profiles:
    blood_group[_['blood_group']] = blood_group[_['blood_group']] + 1 if _['blood_group'] in blood_group else 1
    age = (date_today - _['birthdate']).days
    if  age > max_age['age']:
      max_age['age'] = age
      max_age['profile'] = _
    cur_loc_coord_sum[0] += _['current_location'][0]
    cur_loc_coord_sum[1] += _['current_location'][1]
    sum_ages += int(age / 365)
  bg_l = max(blood_group, key = blood_group.get)
  mean_current_location = (cur_loc_coord_sum[0] / num_profiles, cur_loc_coord_sum[1] / num_profiles)
  return {'largest_blood_type': (bg_l, blood_group[bg_l]), 'mean_current_location': mean_current_location, 'oldest_person': (max_age['profile'], int(max_age['age']/365)), 'average_age': int(sum_ages/num_profiles)}

In [21]:
calculate_time_dict()

Run time: 45.1540s


{'largest_blood_type': ('B-', 1287),
 'mean_current_location': (Decimal('-0.91941042465'), Decimal('1.073366058')),
 'oldest_person': ({'job': 'Production engineer',
   'company': 'Barnett, Caldwell and Ross',
   'ssn': '801-72-2356',
   'residence': '14810 Lisa Shoal Apt. 668\nRaymondtown, OR 78887',
   'current_location': (Decimal('22.5153275'), Decimal('115.929224')),
   'blood_group': 'A+',
   'website': ['http://www.ryan.com/',
    'https://www.hall.info/',
    'http://www.green-diaz.com/',
    'http://www.rice-richardson.net/'],
   'username': 'aaronrobinson',
   'name': 'Jessica Larsen',
   'sex': 'F',
   'address': '321 Nguyen Fords Suite 404\nLake Evanbury, MI 18019',
   'mail': 'cherylhoffman@hotmail.com',
   'birthdate': datetime.date(1904, 10, 14)},
  116),
 'average_age': 57}

In [29]:
def generate_companies(num_companies: int):
  """
  Function takes number of companies as input and returns a list of named
  tuples, each of which is a Company data consisting of Name, Symbol Open,
  High, Close and Weight in given in the calculation of derivative index.
  """
  companies = []
  weights = [round(random.random(), 5) for i in range(num_companies)]
  sum_weights = sum(weights)
  weights = [round(_/sum_weights, 5) for _ in weights]
  symbols = []
  Company = namedtuple('Company', 'Name Symbol Open High Close Weight')
  for _ in range(num_companies):
    co_name = faker.company()
    symbol = (''.join([i[0] for i in re.split('[,. ]+', co_name.replace("-", " "))])).lower()
    temp = ''
    while True:
      if symbol + temp not in symbols:
        symbols.append(symbol + temp)
        break
      temp = random.choice(string.ascii_lowercase)
    symbol += temp
    open = round(random.randint(1500, 5000) * random.uniform(1.0001, 1.0002), 2)
    high = round(open * random.uniform(1.0, 1.5), 2)
    high = high if high > open else open
    close = round(open * random.uniform(0.8, 1.75), 2)
    close = close if high > close else high
    companies.append(Company(co_name, symbol, open, high, close, weights[_]))
  return companies

In [30]:
companies = generate_companies(100)

In [28]:
companies

[Company(Name='Perez Group', Symbol='pg', Open=544.09, High=619.77, Close=518.63, Weight=0.0166),
 Company(Name='Walker and Sons', Symbol='was', Open=1933.33, High=1933.33, Close=1933.33, Weight=0.00615),
 Company(Name='Le Ltd', Symbol='ll', Open=2243.29, High=2270.2, Close=1868.02, Weight=0.00386),
 Company(Name='Johnson LLC', Symbol='jl', Open=1366.18, High=1366.18, Close=1106.68, Weight=0.00848),
 Company(Name='Wood-Joseph', Symbol='wj', Open=2665.49, High=2807.36, Close=2668.92, Weight=0.00072),
 Company(Name='Martin Group', Symbol='mg', Open=3607.49, High=3607.49, Close=3033.84, Weight=0.00608),
 Company(Name='Fletcher, Stephens and Williams', Symbol='fsaw', Open=542.07, High=542.07, Close=498.78, Weight=0.0115),
 Company(Name='Barton PLC', Symbol='bp', Open=2377.46, High=2585.81, Close=2340.95, Weight=0.01453),
 Company(Name='Parker-Chase', Symbol='pc', Open=3422.51, High=3858.72, Close=3526.51, Weight=0.01051),
 Company(Name='Smith PLC', Symbol='sp', Open=890.11, High=1013.45, C

In [33]:
def get_index(num_companies: int) -> "namedtuple":
  """
  Generates and gives the Index open, high and close of a 
  small stock exchange simulation of listed stocks.
  input: num_companies, number of stocks in the exchange
  output: namedtuple('INDEX', 'Index_Open Index_High Index_Close')
  """
  companies = generate_companies(num_companies)
  index_open = round(sum([_.Open * _.Weight for _ in companies]), 2)
  index_high = round(sum([_.High * _.Weight for _ in companies]), 2)
  index_close = round(sum([_.Close * _.Weight for _ in companies]), 2)
  INDEX = namedtuple('INDEX', 'Index_Open Index_High Index_Close')
  return INDEX(index_open, index_high, index_close)

In [34]:
get_index(100)

INDEX(Index_Open=3435.85, Index_High=4307.94, Index_Close=3792.92)