In [36]:
import pandas as pd
import numpy as np
import json
import re
from typing import List

In [14]:
DATA_FOLDER = '../data'
MODELS_FOLDER = '../data/DocProperties/incore-exporter/Workflow.DTO/Models'

In [15]:
df = pd.read_excel(f'{DATA_FOLDER}/DocProperties/DocProperties_filled.xlsx')

In [16]:
df

Unnamed: 0,Name,Result,Description
0,750InsPremAutocivilka,"__Result = ""---"";\nWorkflow.DTO.Models.Program...",Код знаходить програму з кодом типу страхуванн...
1,750InsPremAsist,"__Result = ""---"";\nWorkflow.DTO.Models.Program...",Код обчислює загальну суму запланованих платеж...
2,704LandAddress,"__Result = """";\nWorkflow.DTO.Models.Program pr...",Код формує текстову адресу земельної ділянки з...
3,739ParamSport,"__Result = ""Ні"";\nWorkflow.DTO.Models.Program ...","Код перевіряє, чи в програмі GetProgramMed вст..."
4,769Rate249,"__Result = ""---"";\nWorkflow.DTO.Models.Program...",Код перевіряє кількість об'єктів страхування в...
...,...,...,...
946,MedPaymentSumEURO,Workflow.DTO.Models.Program prog = GetProgramM...,
947,MedInsuranceSumDollar,Workflow.DTO.Models.Program prog = GetProgramM...,
948,MedInsuranceSumEURO,Workflow.DTO.Models.Program prog = GetProgramM...,
949,BranchNameFromParameter,Workflow.DTO.Models.ProgramParameterValue para...,


In [17]:
df_small = df.drop('Name', axis=1)
df_small.to_csv(f'{DATA_FOLDER}/DocProperties_small.csv', index=False)

In [18]:
result = []
for _, row in df_small.iterrows():
    chat = {
        "messages":[
            {
                "role":"user",
                "content":f"Пояснення: {row['Description']}"
            },
            {
                "role":"assistant",
                "content":f"C# code:\n{row['Result']}"
            },
        ]
    }
    result.append(chat)

In [19]:
with open(f'{DATA_FOLDER}/DocProperties_JSONL.json', 'w', encoding='utf-8') as f:
    json.dump(result, f, ensure_ascii=False, indent=4, allow_nan=True)

In [51]:
def extract_member_name(line):
    # Remove access modifiers and common keywords
    line = re.sub(r'\b(public|private|protected|internal|static|readonly|virtual|override|async)\b', '', line)
    line = line.strip()

    # Property pattern: Type Name { get; set; }
    property_match = re.search(r'\b(\w+)\s+(\w+)\s*\{', line)
    if property_match:
        return property_match.group(2)

    # Field pattern: Type Name;
    field_match = re.search(r'\b(\w+)\s+(\w+)\s*;', line)
    if field_match:
        return field_match.group(2)

    # Method pattern: ReturnType MethodName(
    method_match = re.search(r'\b(\w+(?:<.*?>)?)\s+(\w+)\s*\(', line)
    if method_match:
        return method_match.group(2)

    return None


def parse_documentation_members(csharp_code):
    results = []

    # Remove single-line comments that are not XML documentation
    # Keep XML documentation comments (/// or /** */)
    lines = csharp_code.split('\n')

    i = 0
    while i < len(lines):
        line = lines[i].strip()

        # Look for XML documentation comment
        if line.startswith('///'):
            # Extract description from summary tags
            summary_match = re.search(r'/// <summary>\s*(.*?)\s*</summary>', line)
            if summary_match:
                description = summary_match.group(1).strip()
            else:
                # Multi-line summary, collect until </summary>
                description_lines = []
                i += 1
                while i < len(lines):
                    next_line = lines[i].strip()
                    if '</summary>' in next_line:
                        # Extract text before </summary>
                        end_text = re.search(r'(.*?)</summary>', next_line)
                        if end_text:
                            description_lines.append(end_text.group(1).replace('///', '').strip())
                        break
                    else:
                        # Remove /// prefix and collect the text
                        clean_line = next_line.replace('///', '').strip()
                        if clean_line:
                            description_lines.append(clean_line)
                    i += 1

                description = ' '.join(description_lines).strip()

            # Now look for [DocumentationMember] in the following lines
            j = i + 1
            while j < len(lines):
                next_line = lines[j].strip()

                # Skip empty lines and other XML comments
                if not next_line or next_line.startswith('///'):
                    j += 1
                    continue

                # Check if this line has [DocumentationMember]
                if '[DocumentationMember]' in next_line:
                    # Look for the member definition in the next line(s)
                    k = j + 1
                    while k < len(lines):
                        member_line = lines[k].strip()
                        if not member_line:
                            k += 1
                            continue

                        # Try to extract member name
                        member_name = extract_member_name(member_line)
                        if member_name:
                            results.append({
                                "name": member_name,
                                "description": description
                            })
                            break
                        k += 1
                    break
                else:
                    # If we hit a non-documentation line without [DocumentationMember], stop looking
                    break

        i += 1

    return results


In [52]:
with open(f'{MODELS_FOLDER}/Account.cs', 'r', encoding='utf-8') as f:
    code = f.read()
    results = parse_documentation_members(code)
    print(json.dumps(results, ensure_ascii=False, indent=2))

[
  {
    "name": "AccountNumber",
    "description": "Унікальний ідентифікаційний номер банківського рахунку, що призначений для ідентифікації рахунку клієнта в банківській системі"
  },
  {
    "name": "BankMFO",
    "description": "Міжнародний фінансовий код банку (МФО), який використовується для ідентифікації банку, що обслуговує рахунок"
  },
  {
    "name": "BankName",
    "description": "Назва банківської установи, в якій відкрито рахунок клієнта"
  }
]
