In [1]:
import datetime
import os 
import random

from random import randint

import numpy as np
import pandas as pd

from faker import Faker

In [2]:
fake = Faker()
titles = ['sales_rep','sales_leader']
team_types = ['smb_sales', 'enterprise_sales']
locales = ['NAMER','EMEA','APAC']

In [3]:
class Base():
    """
    Base class with shared utilities
    """
    def __init__(self, *args, **kwargs):
        pass
    
    def random_iso_date(self,
                        start=datetime.date(year=2019, month=1, day=1),
                        end=datetime.date.today()):
        max_td = end - start
        rand_interval = randint(0, max_td.total_seconds())
        date = start + datetime.timedelta(seconds=rand_interval)
        return date.isoformat()
    
    def as_dict(self):
        return vars(self)

In [4]:
class Role(Base):
    """
    Schema for the a User's Role
    We'll only ever have as many distinct roles as we have titles
    """
    def __init__(self):
        self.id = titles.index(random.choice(titles)) + 1 # make sure we don't use 0 for id
        self.monthly_quota = self.get_monthly_quota()
        self.title = random.choice(titles)
        
    def get_monthly_quota(self):
        return random.random() * 50000

In [5]:
class Team(Base):
    """
    Schema for a Team. Users are members of a team
    We'll only ever have as many distinct teams as we have types
    Currently, the only types of teams are:
        - smb_sales
        - enterprise_sales
    """
    def __init__(self):
        self.id = team_types.index(random.choice(team_types))
        self.type = random.choice(team_types)
        self.division = random.choice(locales)

In [6]:
class User(Base):
    """
    Schema for a User (employee)
    Users have Roles, and belong to a Team
    """
    def __init__(self, id):
        self.id = id
        self.full_name = fake.name()
        self.role = Role().as_dict()
        self.role_id = self.role.get('id')
        self.team = Team().as_dict()
        self.team_id = self.team.get('id')
        self.is_active = random.choice([True, False])
        self.did_hit_quota = self.decide_on_quota()
        self.date_joined = self.random_iso_date()
        self.date_inactive = self.did_deactivate()
        
    def decide_on_quota(self):
        """
        Generate random did_hit_quota value
        """
        cutoff = random.random() * 50000
        if self.is_active and not self.role.get('monthly_quota') < cutoff:
            return True
        return False

    def did_deactivate(self):
        """
        Let's make sure the date_inactive value comes after date_joined
        """
        if not self.is_active:
            start = datetime.datetime.strptime(self.date_joined, '%Y-%m-%d').date()
            return self.random_iso_date(start=start)
        return None

In [7]:
def yield_user(count):
    for i in range(count):
        yield User(i).as_dict()

def seed_user_table(count):
    return pd.DataFrame([User(i).as_dict() for i in range(1, count)])

In [8]:
USERS = seed_user_table(25)
USERS.to_csv('db/users.csv', index=False, encoding='utf-8')

In [9]:
def get_seeded_user():
    return random.choice(USERS['id'].values)

In [10]:
class Account(Base):
    """
    Schema for an Account
    Accounts are parents to Opportunties and represent a company
    """
    def __init__(self, id):
        self.id = id
        self.name = fake.company()
        self.billing_address_short = fake.street_address()
        self.billing_city = fake.city()
        self.billing_country = fake.country()

In [11]:
def seed_account_table(count):
        return pd.DataFrame([Account(i).as_dict() for i in range(1, count)])

In [12]:
ACCOUNTS = seed_account_table(10)
ACCOUNTS.to_csv('db/accounts.csv', index=False, encoding='utf-8')

In [13]:
def get_seeded_account():
    return random.choice(ACCOUNTS['id'].values)

In [14]:
class Lead(Base):
    
    def __init__(self, id):
        self.id = id
        self.name = fake.name()
        self.account_id = get_seeded_account()
        self.owner_id = get_seeded_user()

In [15]:
def seed_lead_table(count):
    return pd.DataFrame([Lead(i).as_dict() for i in range(1, count)])

In [16]:
LEADS = seed_lead_table(1000)
LEADS.to_csv('db/leads.csv', index=False, encoding='utf-8')

In [17]:
def get_seeded_lead():
    return random.choice(LEADS['id'].values)

In [18]:
class Opportunity(Base):
    """
    Schema for an Opportunity, which represents a deal of a specific value and duration
    Deals can be won or lost and move through a variety of stages
    Opportunities relate to:
        - Account
        - Lead 
        - User (owner)
    """
    def __init__(self, id):
        self.id = id
        self.name = fake.bs()
        self.account_id = get_seeded_account()
        self.lead_id = get_seeded_lead()
        self.owner_id = get_seeded_user()
        self.stage = self.get_random_stage()
        self.deal_value = self.get_random_deal_value()
        self.deal_length = self.get_random_deal_length()
        self.mrr = round(self.deal_value / self.deal_length, 2)
        self.is_closed = self.get_is_closed()
        self.is_won = True if self.is_closed and self.stage == 'closed_won' else None
        self.is_lost = True if self.is_closed and self.stage == 'closed_lost' else None
        self.date_closed = self.random_iso_date() if self.is_closed else None
        
    def get_random_stage(self):
        """
        Naively overseed closed opps to make the analysis more interesting
        """
        vals = ['discovery', 'qualified', 'solution_design', 'closed_won', 'closed_lost', 'closed_won', 'closed_lost']
        return random.choice(vals)
    
    def get_is_closed(self):
        return True if self.stage in ('closed_won', 'closed_lost') else False
    
    def get_random_deal_value(self):
        return round(random.random() * 25000, 2)
    
    def get_random_deal_length(self):
        return random.choice(range(1,12))
 

In [19]:
def seed_opp_table(count):
    return pd.DataFrame([Opportunity(i).as_dict() for i in range(1, count)])

In [20]:
def get_seeded_opp():
    return random.choice(OPPS['id'].values)

In [21]:
OPPS = seed_opp_table(1000)
OPPS.to_csv('db/opps.csv', index=False, encoding='utf-8')

In [22]:
class Product(Base):
    
    def __init__(self, i, product_type):
        self.id = i
        self.type = product_type

In [23]:
PRODUCTS = pd.DataFrame([Product(t[0], t[1]).as_dict() for t in 
                         ((1, 'database'), (2, 'prof_consulting'), (3, 'self_monitor'))])
PRODUCTS.to_csv('db/product.csv', index=False, encoding='utf-8')

In [24]:
class Order(Base):
    def __init__(self, id):
        self.id = id
        self.opportunity_id = get_seeded_opp()
        self.products = self.seed_products()
        self.created_at = self.random_iso_date()
        
    def seed_products(self):
        seed = range(random.randint(1, PRODUCTS.id.max()))
        return PRODUCTS[PRODUCTS['id'].isin(seed)].id.values.tolist()
        

In [25]:
cnt_closed = OPPS[OPPS['is_closed'] == True]
ORDERS = pd.DataFrame([Order(i).as_dict() for i in range(cnt_closed.id.max())])
ORDERS.to_csv('db/orders.csv', index=False, encoding='utf-8')