# Анализ накачек языка

Реализация импортируемых классов находится [здесь](https://github.com/SmEgDm/tfl/tree/main/labs/lab3/analyze_cfg)

In [1]:
import os
from functools import reduce
from graphviz import Digraph

from CFG import CFG
from Term import Term
from Node import Node
from Test import Test

## Поиск регулярных подмножеств

In [2]:
def regular_subset(cfg: CFG):
    subset = set()
    for nterm in cfg.get_nterms():
        is_regular = True
        for rule in cfg.rules_with_left_side(nterm):
            if not rule.is_right_linear():
                is_regular = False
        if is_regular:
            subset.add(nterm)
    
    changed = True
    while changed:
        changed = False
        irregular_nterms = set()
        for nterm in subset:
            for rule in cfg.rules_with_left_side(nterm):
                for term in rule.right_side:
                    if term.is_nterm() and term not in subset:
                        irregular_nterms.add(nterm)
                        changed = True
        
        for nterm in irregular_nterms:
            if nterm in subset:
                subset.remove(nterm)

    return subset

## Дерево накачек

In [3]:
class PumpTree:
    def _dfs(term, start_nterm, cfg, visited):
        if not term.is_nterm():
            return (Node(term), False)
        
        if term in visited:
            if term == start_nterm:
                return (Node(term), True)
            return (None, False)

        for rule in cfg.rules_with_left_side(term):
            node = Node(term)
            found = False
            for child_term in rule.right_side:
                if found:
                    child = Node(child_term)
                else:
                    visited.add(term)
                    child, found = PumpTree._dfs(
                        child_term, 
                        start_nterm, 
                        cfg, 
                        visited
                    )
                    if child == None:
                        break
                node.add_child(child)
            
            if found:
                return (node, True)
        
        return (None, False)
    
    def __init__(self, pump_nterm, cfg):
        Node.set_id(0)
        self.root, _ = PumpTree._dfs(pump_nterm, pump_nterm, cfg, set())
    
    def get_pumping(self):
        pumping = self.root.get_pumping()
        separator_index = pumping.index(self.root.term)

        return pumping[0:separator_index], pumping[separator_index + 1:]

    def to_graphviz(self):
        digraph = Digraph(format='svg')
        self.root.to_graphviz(digraph)
        
        return digraph

## Проверка $\Phi_1 \in L(\Phi_2^{+})$

In [4]:
def belongs_to_language(f1, f2, cfg):
    def rec(f1_suffix, f2_suffix):
        f1_suffix = list(f1_suffix)
        f2_suffix = list(f2_suffix)
        while (
            len(f1_suffix) != 0 and
            len(f2_suffix) != 0 and
            f1_suffix[0] == f2_suffix[0]
        ):
            f1_suffix.pop(0)
            f2_suffix.pop(0)
        
        if len(f1_suffix) == 0:
            return True
        if len(f2_suffix) == 0:
            return len(f2) != 0 and rec(f1_suffix, f2)
        if not f2_suffix[0].is_nterm():
            return False
        
        return reduce(
            lambda result, rule:
                result or rec(
                    f1_suffix,
                    rule.right_side + f2_suffix[1:]
                ),
            cfg.rules_with_left_side(f2_suffix[0]),
            False
        )
    
    return rec(f1, f2)

## Развертка нетерминала

In [5]:
def unfold(unfold_nterm: Term, cfg: CFG):
    used_nterms = set()
    def rec(nterm):
        if nterm in used_nterms:
            return []
        
        words = []
        for rule in cfg.rules_with_left_side(nterm):
            new_words = [[]]
            for term in rule.right_side:
                if not term.is_nterm():
                    for i in range(len(new_words)):
                        new_words[i] += [term]
                else:
                    used_nterms.add(nterm)
                    suffixes = rec(term)
                    for i in range(len(new_words)):
                        for j in range(len(suffixes)):
                            new_words[i] += suffixes[j]
            words += new_words
        
        min_length = min([len(word) for word in words])
        result = []
        for word in words:
            if len(word) == min_length:
                result.append(word)
            
        return result
    
    return rec(unfold_nterm)

## Рекурсивное замыкание

In [6]:
def clousure(regular_nterms, possibly_regular_nterms, cfg):    
    changed = True
    while changed:
        changed = False
        for nterm in cfg.get_nterms():
            if (
                nterm in regular_nterms or 
                nterm in possibly_regular_nterms
            ): continue

            is_regular = is_possibly_regular = True
            for rule in cfg.rules_with_left_side(nterm):
                for term in rule.right_side:
                    if term.is_nterm() and term not in regular_nterms:
                        is_regular = False
                        if term not in possibly_regular_nterms:
                            is_possibly_regular = False
            
            if is_regular:
                regular_nterms.add(nterm)
                changed = True
            elif is_possibly_regular:
                possibly_regular_nterms.add(nterm)
                changed = True

## Анализатор

In [7]:
def analyze(cfg):
    regular_nterms = regular_subset(cfg)
    suspicious_nterms = set()
    possibly_regular_nterms = set()
    trees = []

    for nterm in cfg.get_nterms():
        if nterm in regular_nterms:
            continue

        pump_tree = PumpTree(nterm, cfg)

        if pump_tree.root != None:
            f1, f2 = pump_tree.get_pumping()

            f2_contains_irregular_nterms = False
            for term in f2:
                if term.is_nterm() and term not in regular_nterms:
                    f2_contains_irregular_nterms = True
                    break

            if (
                f2_contains_irregular_nterms or 
                not belongs_to_language(f1, f2, cfg)
            ):
                suspicious_nterms.add(nterm)
                trees.append(pump_tree)
                continue

            all_belongs = True
            for word in unfold(nterm, cfg):
                if not belongs_to_language(word, f2, cfg):
                    all_belongs = False
                    break
            if all_belongs:
                possibly_regular_nterms.add(nterm)
        
    clousure(regular_nterms, possibly_regular_nterms, cfg)

    return (
        regular_nterms,
        possibly_regular_nterms,
        suspicious_nterms,
        trees
    )

## Тесты

In [8]:
from Test import TESTS

TESTS_DIR = 'tests'

for test_name in os.listdir(TESTS_DIR):
    Test(
        test_name.split('.')[0], 
        os.path.join(TESTS_DIR, test_name)
    ).execute(analyze)

Test.display_results()

Tab(children=(HTML(value='\n            <div>\n                <p>\n                    <b>КС-грамматика</b><b…