# Load handbook

In [11]:
from pathlib import Path

file = Path("../data/general/employee_handbook.md")

handbook = file.read_text(encoding="utf-8", errors="ignore")
print(handbook[:2000])

# Employee Handbook

## Table of Contents
1. [Welcome & Introduction](#welcome--introduction)
2. [Employee Onboarding & Benefits](#employee-onboarding--benefits)
3. [Leave Policies](#leave-policies)
4. [Work Hours & Attendance](#work-hours--attendance)
5. [Code of Conduct & Workplace Behavior](#code-of-conduct--workplace-behavior)
6. [Health & Safety](#health--safety)
7. [Compensation & Payroll](#compensation--payroll)
8. [Reimbursement Policies](#reimbursement-policies)
9. [Training & Development](#training--development)
10. [Performance & Feedback](#performance--feedback)
11. [Privacy & Data Security](#privacy--data-security)
12. [Exit Policy](#exit-policy)
13. [FAQs](#faqs)
14. [Miscellaneous](#miscellaneous)

---

## Welcome & Introduction

### Company Vision and Mission
At FinSolve Technologies, our vision is to empower businesses and individuals through innovative technology solutions. Our mission is to deliver high-quality, sustainable products and services that create value for

# File Cleaning

## Remove table of content

In [12]:
lines = handbook.splitlines()
cleaned_handbook = []

table_of_contents = False

for line in lines:
    stripped = line.strip()

    if stripped.lower().startswith("## table of contents"):
        table_of_contents = True
        continue

    if table_of_contents and (stripped.startswith("## ") or stripped == "---"):
        table_of_contents = False
        cleaned_handbook.append(line)
        continue

    if not table_of_contents:
        cleaned_handbook.append(line)

print(cleaned_handbook[:20])

['# Employee Handbook', '', '---', '', '## Welcome & Introduction', '', '### Company Vision and Mission', 'At FinSolve Technologies, our vision is to empower businesses and individuals through innovative technology solutions. Our mission is to deliver high-quality, sustainable products and services that create value for our stakeholders.', '', '### Core Values', '- **Integrity**: We act with honesty and transparency.', '- **Respect**: We value diversity and treat everyone with dignity.', '- **Innovation**: We encourage creativity and continuous improvement.', '- **Customer Focus**: Our customers are at the heart of everything we do.', '- **Accountability**: We take responsibility for our actions and results.', '', '### Company Overview', 'Founded in 2016, FinSolve Technologies is a leading player in fintech with a presence across India and global markets. We are committed to ethical business, social responsibility, and fostering a culture of learning and growth.', '', '---']


In [13]:
for line in cleaned_handbook[:20]:
    print(line)

# Employee Handbook

---

## Welcome & Introduction

### Company Vision and Mission
At FinSolve Technologies, our vision is to empower businesses and individuals through innovative technology solutions. Our mission is to deliver high-quality, sustainable products and services that create value for our stakeholders.

### Core Values
- **Integrity**: We act with honesty and transparency.
- **Respect**: We value diversity and treat everyone with dignity.
- **Innovation**: We encourage creativity and continuous improvement.
- **Customer Focus**: Our customers are at the heart of everything we do.
- **Accountability**: We take responsibility for our actions and results.

### Company Overview
Founded in 2016, FinSolve Technologies is a leading player in fintech with a presence across India and global markets. We are committed to ethical business, social responsibility, and fostering a culture of learning and growth.

---


# List metadata

In [14]:
for line in cleaned_handbook:
    
    line = line.strip()

    if line.startswith("## "):
        current_section = line[3:].strip()
        print(f"\nSection: {current_section}\n")
    elif line.startswith("### "):
        current_subsection = line[4:].strip()
        print(f" Subsection: {current_subsection}\n")



Section: Welcome & Introduction

 Subsection: Company Vision and Mission

 Subsection: Core Values

 Subsection: Company Overview


Section: Employee Onboarding & Benefits

 Subsection: Onboarding Process

 Subsection: Employee Benefits


Section: Leave Policies

 Subsection: Types of Leave

 Subsection: Leave Application Process

 Subsection: Public Holidays Policy


Section: Work Hours & Attendance

 Subsection: Work Hours

 Subsection: Attendance & Punctuality

 Subsection: Time Tracking

 Subsection: Overtime & Compensation


Section: Code of Conduct & Workplace Behavior

 Subsection: Professionalism & Respect

 Subsection: Anti-Discrimination & Equal Opportunity

 Subsection: Dress Code

 Subsection: Substance Abuse Policy

 Subsection: Harassment & Bullying Prevention

 Subsection: Conflict Resolution Process


Section: Health & Safety

 Subsection: Workplace Safety Guidelines

 Subsection: Accident & Injury Reporting

 Subsection: Mental Health & Well-being Support

 Subsection

In [15]:
lines = handbook.splitlines()
cleaned_handbook = []

table_of_contents = False

for line in lines:
    stripped = line.strip()

    if stripped.lower().startswith("## table of contents"):
        table_of_contents = True
        continue

    if table_of_contents and (stripped.startswith("## ") or stripped == "---"):
        table_of_contents = False
        cleaned_handbook.append(line)
        continue

    if not table_of_contents:
        cleaned_handbook.append(line)

print(cleaned_handbook[:20])

['# Employee Handbook', '', '---', '', '## Welcome & Introduction', '', '### Company Vision and Mission', 'At FinSolve Technologies, our vision is to empower businesses and individuals through innovative technology solutions. Our mission is to deliver high-quality, sustainable products and services that create value for our stakeholders.', '', '### Core Values', '- **Integrity**: We act with honesty and transparency.', '- **Respect**: We value diversity and treat everyone with dignity.', '- **Innovation**: We encourage creativity and continuous improvement.', '- **Customer Focus**: Our customers are at the heart of everything we do.', '- **Accountability**: We take responsibility for our actions and results.', '', '### Company Overview', 'Founded in 2016, FinSolve Technologies is a leading player in fintech with a presence across India and global markets. We are committed to ethical business, social responsibility, and fostering a culture of learning and growth.', '', '---']


In [16]:
def get_section(line):
    line = line.strip()
    if line.startswith("## "):
        return line[3:].strip()
    return None

def get_subsection(line):
    line = line.strip()
    if line.startswith("### "):
        return line[4:].strip()
    return None

def get_subsection_content(cleaned_handbook):
    
    current_section = None
    current_subsection = None
    collecting = False
    content = []
    results = []

    def get_content():

        nonlocal collecting, content, current_section, current_subsection, results

        if not collecting or not content or current_section is None:
             return

        # contents = "\n".join(content).strip()

        # if not contents:
        #     content = []
        #     return

        results.append({
            "section": current_section,
            "subsection": current_subsection,
            "content": content[:]
        })
        content = []

    for line in cleaned_handbook:
        stripped = line.strip()

        new_section = get_section(stripped)
        if new_section is not None:
            if collecting:
                get_content()
                collecting = False

            current_section = new_section
            continue

        new_subsection = get_subsection(stripped)
        if new_subsection is not None:
            if collecting:
                get_content()

            current_subsection = new_subsection
            collecting = True
            continue

        if collecting:
            content.append(line)

    if collecting:
        get_content()

    return results


In [17]:
# extract metadata + section content 

chunked_book = get_subsection_content(cleaned_handbook)

In [18]:
# save as json file
import json
output_file = Path("../data/general/chunked_reports/employee_handbook.json")
output_file.write_text(json.dumps(chunked_book, indent=2), encoding="utf-8", errors="ignore")
print(f"Saved chunked employee handbook to {output_file}")

Saved chunked employee handbook to ..\data\general\employee_handbook.json
