In [4]:
import json
import os
os.makedirs("data/json_files", exist_ok=True)

In [3]:
company_data = {
    "company_name": "TechNova Solutions",
    "employees": [
        {
            "employee_id": 101,
            "name": "Alice Johnson",
            "role": "Software Engineer",
            "skills": ["Python", "Django", "SQL"],
            "projects": [
                {"project_id": "P001", "name": "Inventory System", "status": "Completed"},
                {"project_id": "P002", "name": "E-commerce Platform", "status": "Ongoing"}
            ]
        },
        {
            "employee_id": 102,
            "name": "Bob Smith",
            "role": "Frontend Developer",
            "skills": ["React", "JavaScript", "CSS"],
            "projects": [
                {"project_id": "P003", "name": "Customer Portal", "status": "Ongoing"}
            ]
        },
        {
            "employee_id": 103,
            "name": "Charlie Davis",
            "role": "Data Scientist",
            "skills": ["Python", "Pandas", "Machine Learning"],
            "projects": [
                {"project_id": "P004", "name": "Sales Forecasting", "status": "Completed"},
                {"project_id": "P005", "name": "AI Chatbot", "status": "Ongoing"}
            ]
        },
        {
            "employee_id": 104,
            "name": "Diana Lee",
            "role": "UI/UX Designer",
            "skills": ["Figma", "Adobe XD", "Illustrator"],
            "projects": [
                {"project_id": "P006", "name": "Mobile App Design", "status": "Completed"}
            ]
        },
        {
            "employee_id": 105,
            "name": "Ethan Brown",
            "role": "DevOps Engineer",
            "skills": ["AWS", "Docker", "Kubernetes"],
            "projects": [
                {"project_id": "P007", "name": "Cloud Migration", "status": "Ongoing"}
            ]
        }
    ],
    "departments": {
        "Engineering": {
            "head": "Michael Scott",
            "budget": 500000,
            "team_size": 25
        },
        "Human Resources": {
            "head": "Rachel Green",
            "budget": 150000,
            "team_size": 8
        }
    }
}

In [5]:
with open("data/json_files/company_data.json", "w", encoding="utf-8") as f:
    json.dump(company_data, f, indent=4)

In [8]:
event_data = [
    {"timestamp": "2025-08-17T09:00:00Z", "event": "user_login", "user_id": 101},
    {"timestamp": "2025-08-17T09:05:00Z", "event": "file_upload", "user_id": 102},
    {"timestamp": "2025-08-17T09:10:00Z", "event": "user_logout", "user_id": 101},
    {"timestamp": "2025-08-17T09:20:00Z", "event": "password_change", "user_id": 103},
    {"timestamp": "2025-08-17T09:30:00Z", "event": "user_login", "user_id": 104},
]

with open("data/json_files/event_data.jsonl", "w", encoding="utf-8") as f:
    for event in event_data:
        f.write(json.dumps(event) + "\n")

#json processing


In [16]:
from langchain_community.document_loaders import JSONLoader
import json

json_loader = JSONLoader( 
                         file_path="data/json_files/company_data.json",
                         jq_schema=".employees[]",
                         text_content=False
                         )


json_doc = json_loader.load()
print(json_doc[1].page_content)


{"employee_id": 102, "name": "Bob Smith", "role": "Frontend Developer", "skills": ["React", "JavaScript", "CSS"], "projects": [{"project_id": "P003", "name": "Customer Portal", "status": "Ongoing"}]}


In [18]:
import json
from typing import List
from langchain_core.documents import Document


def parse_json_intelligently(file_path: str) -> List[Document]:
    with open(file_path, "r", encoding="utf-8") as f:
        data = json.load(f)

    documents = []
    for employee in data.get("employees", []):
        content = f"""
        Employee Profile:
        Name: {employee.get("name")}
        Role: {employee.get("role")}
        Skills: {", ".join(employee.get("skills", []))}
        
        Projects:
        """

        for project in employee.get("projects", []):
            content += f"""
            - Project ID: {project.get("project_id")}
              Name: {project.get("name")}
              Status: {project.get("status")}
            """

        doc = Document(
            page_content=content.strip(),
            metadata={
                "employee_id": employee.get("employee_id"),
                "name": employee.get("name"),
                "role": employee.get("role"),
                "skills": employee.get("skills", []),
                "num_projects": len(employee.get("projects", [])),
                "source": file_path
            }
        )

        documents.append(doc)

    return documents


In [21]:
print(parse_json_intelligently("data/json_files/company_data.json")[0])

page_content='Employee Profile:
        Name: Alice Johnson
        Role: Software Engineer
        Skills: Python, Django, SQL

        Projects:
        
            - Project ID: P001
              Name: Inventory System
              Status: Completed
            
            - Project ID: P002
              Name: E-commerce Platform
              Status: Ongoing' metadata={'employee_id': 101, 'name': 'Alice Johnson', 'role': 'Software Engineer', 'skills': ['Python', 'Django', 'SQL'], 'num_projects': 2, 'source': 'data/json_files/company_data.json'}
