In [2]:
import json
from typing import Dict
from dataclasses import dataclass, field, asdict

@dataclass
class Lemma:
    lemma: str
    pos: str
    inflections: str = ''
    frequency: int = 0
    wordform_freqs: Dict[str, int] = field(default_factory=dict)

def read_data(data):
    lemmas = {}
    for sentence in data["sentences"]:
        for token in sentence["tokens"]:
            lemma = token["lemma"]
            wordform = token["text"]
            pos = token["pos"]
            inflections = token["feats"]

            if lemma not in lemmas:
                lemmas[lemma] = Lemma(lemma, pos, inflections)
            lemmas[lemma].frequency += 1

            if wordform not in lemmas[lemma].wordform_freqs:
                lemmas[lemma].wordform_freqs[wordform] = 0
            lemmas[lemma].wordform_freqs[wordform] += 1

    return lemmas

def main():
    with open("C:/Users/ACER/Downloads/sample_parsed_sentences.json", "r", encoding='utf-8') as f:
        data = json.load(f)
    lemmas = read_data(data)
    with open("C:/Users/ACER/Downloads/output.json", "w", encoding='utf-8') as f:
        json.dump([asdict(lemma) for lemma in lemmas.values()], f, ensure_ascii=False, indent=4)

if __name__ == "__main__":
    main()
