In [65]:
import pandas as pd
import numpy as np
import json
import re
import os

In [66]:
DATA_FOLDER = '../data'
MODELS_FOLDER = '../data/DocProperties/incore-exporter/Workflow.DTO/Models'

In [67]:
df = pd.read_excel(f'{DATA_FOLDER}/DocProperties/DocProperties_filled.xlsx')

In [68]:
df

Unnamed: 0,Name,Result,Description
0,750InsPremAutocivilka,"__Result = ""---"";\nWorkflow.DTO.Models.Program...",Код знаходить програму з кодом типу страхуванн...
1,750InsPremAsist,"__Result = ""---"";\nWorkflow.DTO.Models.Program...",Код обчислює загальну суму запланованих платеж...
2,704LandAddress,"__Result = """";\nWorkflow.DTO.Models.Program pr...",Код формує текстову адресу земельної ділянки з...
3,739ParamSport,"__Result = ""Ні"";\nWorkflow.DTO.Models.Program ...","Код перевіряє, чи в програмі GetProgramMed вст..."
4,769Rate249,"__Result = ""---"";\nWorkflow.DTO.Models.Program...",Код перевіряє кількість об'єктів страхування в...
...,...,...,...
946,MedPaymentSumEURO,Workflow.DTO.Models.Program prog = GetProgramM...,
947,MedInsuranceSumDollar,Workflow.DTO.Models.Program prog = GetProgramM...,
948,MedInsuranceSumEURO,Workflow.DTO.Models.Program prog = GetProgramM...,
949,BranchNameFromParameter,Workflow.DTO.Models.ProgramParameterValue para...,


In [69]:
df_small = df.drop('Name', axis=1)
df_small.to_csv(f'{DATA_FOLDER}/DocProperties_small.csv', index=False)

In [70]:
result = []
for _, row in df_small.iterrows():
    chat = {
        "messages":[
            {
                "role":"user",
                "content":f"Пояснення: {row['Description']}"
            },
            {
                "role":"assistant",
                "content":f"C# code:\n{row['Result']}"
            },
        ]
    }
    result.append(chat)

In [71]:
with open(f'{DATA_FOLDER}/DocProperties_JSONL.json', 'w', encoding='utf-8') as f:
    json.dump(result, f, ensure_ascii=False, indent=2, allow_nan=True)

In [72]:
def parse_documentation_members(csharp_code):
    results = {}
    current_class = None

    lines = csharp_code.split('\n')

    i = 0
    while i < len(lines):
        line = lines[i].strip()

        class_match = re.search(r'\bclass\s+(\w+)', line)
        if class_match:
            current_class = class_match.group(1)
            if current_class not in results:
                results[current_class] = []

        if line.startswith('///'):
            summary_match = re.search(r'/// <summary>\s*(.*?)\s*</summary>', line)
            if summary_match:
                description = summary_match.group(1).strip()
            else:
                description_lines = []
                i += 1
                while i < len(lines):
                    next_line = lines[i].strip()
                    if '</summary>' in next_line:
                        end_text = re.search(r'(.*?)</summary>', next_line)
                        if end_text:
                            description_lines.append(end_text.group(1).replace('///', '').strip())
                        break
                    else:
                        clean_line = next_line.replace('///', '').strip()
                        if clean_line:
                            description_lines.append(clean_line)
                    i += 1

                description = ' '.join(description_lines).strip()

            j = i + 1
            while j < len(lines):
                next_line = lines[j].strip()

                if not next_line or next_line.startswith('///'):
                    j += 1
                    continue

                if '[DocumentationMember]' in next_line:
                    k = j + 1
                    while k < len(lines):
                        member_line = lines[k].strip()
                        if not member_line:
                            k += 1
                            continue

                        member_info = extract_member_info(member_line)
                        if member_info:
                            member_data = {
                                "name": member_info["name"],
                                "type": member_info["type"],
                                "description": description
                            }

                            if current_class:
                                if current_class not in results:
                                    results[current_class] = []
                                results[current_class].append(member_data)
                            else:
                                if "Unknown" not in results:
                                    results["Unknown"] = []
                                results["Unknown"].append(member_data)
                            break
                        k += 1
                    break
                else:
                    break

        i += 1

    return results

def extract_member_info(line):
    original_line = line
    line = re.sub(r'\b(public|private|protected|internal|static|readonly|virtual|override|async)\b', '', line)
    line = line.strip()

    property_match = re.search(r'\b(\w+(?:<.*?>)?)\s+(\w+)\s*\{', line)
    if property_match:
        return {
            "name": property_match.group(2),
            "type": property_match.group(1)
        }


    field_match = re.search(r'\b(\w+(?:<.*?>)?)\s+(\w+)\s*;', line)
    if field_match:
        return {
            "name": field_match.group(2),
            "type": field_match.group(1)
        }


    method_match = re.search(r'\b(\w+(?:<.*?>)?)\s+(\w+)\s*\(', line)
    if method_match:
        return {
            "name": method_match.group(2),
            "type": f"Method -> {method_match.group(1)}"
        }

    complex_method_match = re.search(r'\b(Task<\w+>|\w+<.*?>)\s+(\w+)\s*\(', line)
    if complex_method_match:
        return {
            "name": complex_method_match.group(2),
            "type": f"Method -> {complex_method_match.group(1)}"
        }

    return None


In [73]:
with open(f'{MODELS_FOLDER}/Animal.cs', 'r', encoding='utf-8') as f:
    code = f.read()
    results = parse_documentation_members(code)
    print(json.dumps(results, ensure_ascii=False, indent=2))

{
  "Animal": [
    {
      "name": "Name",
      "type": "String",
      "description": "Ім'я тварини"
    },
    {
      "name": "Age",
      "type": "String",
      "description": "Вік тварини"
    },
    {
      "name": "SuitAndSigns",
      "type": "String",
      "description": "Прикмети та ознаки тварини"
    },
    {
      "name": "Owner",
      "type": "Face",
      "description": "Глобальний унікальний ідентифікатор власника"
    },
    {
      "name": "PostAddressObject",
      "type": "PostAddress",
      "description": "Поштова адреса об'єкта, пов'язаного з твариною"
    },
    {
      "name": "OwnerName",
      "type": "string",
      "description": "Ім'я власника тварини"
    }
  ]
}


In [74]:
models = []
for file in os.listdir(f'{MODELS_FOLDER}'):
    with open(f'{MODELS_FOLDER}/{file}', 'r', encoding='utf-8') as f:
        code = f.read()
        results = parse_documentation_members(code)
        models.append(
            {
            "class":file.replace(".cs", ""),
            'members':results
            })

In [75]:
with open(f'{DATA_FOLDER}/Models_doc.json', 'w', encoding='utf-8') as f:
    json.dump(models, f, ensure_ascii=False, indent=2, allow_nan=True)