In [1]:
import importlib
prompts = importlib.import_module('prompts')
importlib.reload(prompts)

from openai import OpenAI
client = OpenAI()

from IPython.display import display, Markdown

documents = []

chat_history = []

In [20]:
import tiktoken

encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")

for prompt_dict in prompts.employee_contract_prompts_list:
    num_tokens = len(encoding.encode(prompt_dict['doc_description']))
    print(f"{prompt_dict['doc_title']}: {num_tokens} tokens")

Employment Contract: 69 tokens
Employee Stock Ownership Plan (ESOP): 80 tokens
Employee Handbook: 82 tokens
Employee Payslip: 72 tokens


In [36]:
for prompt_dict in prompts.employee_contract_prompts_list:
	if prompt_dict['doc_title'] != "Employee Handbook":
		continue
	chat_history.append(
		{
			"role": "user",
			"content": prompts.base_prompt.format_map(prompt_dict)
		})
	response = client.chat.completions.create(
		model="gpt-3.5-turbo",
		messages=chat_history,
		temperature=1,
		max_tokens=750,
		top_p=1,
		frequency_penalty=0,
		presence_penalty=0
		)
	chat_history.pop()
	chat_history.append(
		{
			"role": "system",
			"content": response.choices[0].message.content
		})

	with open(f"../data/hr_docs_templates/{prompt_dict['doc_title']}.md", "w") as f:
		f.write(response.choices[0].message.content)

	documents.append(response.choices[0].message.content)

In [37]:
documents

["# Employee Handbook\n\n## Company Name: Instagram\n\n### Table of Contents\n1. Introduction\n2. Mission and Values\n3. Code of Conduct\n4. Anti-Discrimination and Harassment Policies\n5. Leave and Attendance Policies\n6. Performance Management Processes\n7. Employee Benefits\n8. Contact Information\n\n## 1. Introduction\nWelcome to Instagram! This Employee Handbook is designed to provide you with important information about our company policies, procedures, benefits, and expectations. Please take the time to read through this handbook carefully and refer back to it whenever you have questions about your rights and responsibilities as an employee.\n\n## 2. Mission and Values\nAt Instagram, our mission is to provide a platform where users can connect, create, and be inspired. We value creativity, diversity, and inclusion in everything we do. It is important that all employees align with our mission and values to create a positive and inclusive work environment.\n\n## 3. Code of Conduct

In [24]:
import re

def extract_variables_from_markdown(markdown_text):
    # Regular expression pattern to find variables enclosed in curly braces
    pattern = r'\{([^}]*)\}'
    
    # Find all matches of the pattern in the markdown text
    matches = re.findall(pattern, markdown_text)
    
    # Return the matches as a list
    return matches

variables_employee_contract = extract_variables_from_markdown(documents[0])
variables_employee_esop = extract_variables_from_markdown(documents[1])
variables_employee_payslip = extract_variables_from_markdown(documents[2])

In [38]:
import pandas as pd

df = pd.DataFrame({
    "employee_contract": pd.Series(variables_employee_contract).drop_duplicates(),
    "employee_esop": pd.Series(variables_employee_esop).drop_duplicates(),
    "employee_payslip": pd.Series(variables_employee_payslip).drop_duplicates()
})

df

Unnamed: 0,employee_contract,employee_esop,employee_payslip
0,employee_name,employee_name,employee_name
1,employee_position,employee_position,employee_id
2,employee_department,employee_department,employee_position
3,employee_responsibilities,esop_allocation,employee_department
4,employee_compensation,esop_vesting,pay_period_start_date
5,employee_benefits,esop_criteria,pay_period_end_date
6,employee_work_hours,esop_valuation,earnings_basic_salary
7,employee_termination_conditions,esop_rights_responsibilities,earnings_allowances
8,employee_confidentiality_agreement,employer_name,earnings_bonuses
9,employee_ip_rights,,earnings_total


In [2]:
# display(Markdown(prompts.employee_contract_template.format_map(prompts.employee_contract_list[0])))

for employee_dict in prompts.employee_contract_list:
    employee_contract_filled = prompts.employee_contract_template.format_map(employee_dict)
    with open(f"../data/hr_docs_filled/employee_contracts/employee_contract_{employee_dict['employee_name']}.md", "w") as f:
        f.write(employee_contract_filled)

In [3]:
for employee_dict in prompts.employee_esop_list:
    employee_esop_filled = prompts.employee_esop_template.format_map(employee_dict)
    with open(f"../data/hr_docs_filled/employee_esops/employee_esop_{employee_dict['employee_name']}.md", "w") as f:
        f.write(employee_esop_filled)

In [6]:
for employee_dict in prompts.employee_payslip_list:
    employee_payslip_filled = prompts.employee_payslip_template.format_map(employee_dict)
    with open(f"../data/hr_docs_filled/employee_payslips/employee_payslip_{employee_dict['employee_name']}.md", "w") as f:
        f.write(employee_payslip_filled)