Hochfrequenz · hf-krechan · Sep 23, 2022 · Sep 23, 2022 · Sep 23, 2022 · Sep 23, 2022
diff --git a/ahbextractor/helper/write_functions.py b/ahbextractor/helper/write_functions.py
@@ -14,26 +14,26 @@
 
 
 def parse_paragraph_in_edifact_struktur_column_to_dataframe(
-    paragraph: Paragraph,
+    paragraphs: List[Paragraph],
     dataframe: pd.DataFrame,
     row_index: int,
     edifact_struktur_cell_left_indent_position: int,
 ):
     """Parses a paragraph in the edifact struktur column and puts the information into the appropriate columns
 
     Args:
-        paragraph (Paragraph): Current paragraph in the edifact struktur cell
+        paragraphs (Paragraph): Current paragraphs in the edifact struktur cell
         dataframe (pd.DataFrame): Contains all infos
         row_index (int): Current index of the DataFrame
         edifact_struktur_cell_left_indent_position (int): Position of the left indent from the indicator edifact
             struktur cell
     """
-
-    splitted_text_at_tabs = paragraph.text.split("\t")
-    tab_count = paragraph.text.count("\t")
+    joined_text = " ".join(p.text for p in paragraphs)
+    splitted_text_at_tabs = joined_text.split("\t")
+    tab_count = joined_text.count("\t")
 
     # Check if the line starts on the far left
-    if paragraph.paragraph_format.left_indent != edifact_struktur_cell_left_indent_position:
+    if paragraphs[0].paragraph_format.left_indent != edifact_struktur_cell_left_indent_position:
 
         if tab_count == 2:
             dataframe.at[row_index, "Segment Gruppe"] = splitted_text_at_tabs[0]
@@ -42,13 +42,14 @@ def parse_paragraph_in_edifact_struktur_column_to_dataframe(
         elif tab_count == 1:
             dataframe.at[row_index, "Segment Gruppe"] = splitted_text_at_tabs[0]
             dataframe.at[row_index, "Segment"] = splitted_text_at_tabs[1]
-        elif tab_count == 0 and not paragraph.text == "":
-            if paragraph.runs[0].bold:
+        elif tab_count == 0 and joined_text.strip() != "":
+            if paragraphs[0].runs[0].bold:
                 # Segmentgruppe: SG8
                 dataframe.at[row_index, "Segment Gruppe"] = splitted_text_at_tabs[0]
             else:
                 # Segmentname: Referenzen auf die ID der\nTranche
-                if dataframe.at[row_index, "Segment Gruppe"] == "":
+                _sg_text = dataframe.at[row_index, "Segment Gruppe"]
+                if _sg_text == "":
                     # Referenzen auf die ID der
                     dataframe.at[row_index, "Segment Gruppe"] = splitted_text_at_tabs[0]
                 else:
@@ -155,7 +156,7 @@ def write_segment_name_to_dataframe(
     # EDIFACT STRUKTUR COLUMN
     for paragraph in edifact_struktur_cell.paragraphs:
         parse_paragraph_in_edifact_struktur_column_to_dataframe(
-            paragraph=paragraph,
+            paragraphs=[paragraph],
             dataframe=elixir.soul,
             row_index=elixir.current_df_row_index,
             edifact_struktur_cell_left_indent_position=elixir.edifact_struktur_left_indent_position,
@@ -195,7 +196,8 @@ def write_segmentgruppe_to_dataframe(
 
     # EDIFACT STRUKTUR COLUMN
     parse_paragraph_in_edifact_struktur_column_to_dataframe(
-        paragraph=edifact_struktur_cell.paragraphs[0],
+        # there might be 2 paragraphs in case of multi line headings, so we're handing over all the paragraphs
+        paragraphs=edifact_struktur_cell.paragraphs,
         dataframe=elixir.soul,
         row_index=elixir.current_df_row_index,
         edifact_struktur_cell_left_indent_position=elixir.edifact_struktur_left_indent_position,
@@ -235,7 +237,7 @@ def write_segment_to_dataframe(
 
     # EDIFACT STRUKTUR COLUMN
     parse_paragraph_in_edifact_struktur_column_to_dataframe(
-        paragraph=edifact_struktur_cell.paragraphs[0],
+        paragraphs=edifact_struktur_cell.paragraphs,
         dataframe=elixir.soul,
         row_index=elixir.current_df_row_index,
         edifact_struktur_cell_left_indent_position=elixir.edifact_struktur_left_indent_position,
@@ -318,7 +320,7 @@ def write_dataelement_to_dataframe(
 
     # EDIFACT STRUKTUR COLUMN
     parse_paragraph_in_edifact_struktur_column_to_dataframe(
-        paragraph=edifact_struktur_cell.paragraphs[0],
+        paragraphs=edifact_struktur_cell.paragraphs,
         dataframe=elixir.soul,
         row_index=elixir.current_df_row_index,
         edifact_struktur_cell_left_indent_position=elixir.edifact_struktur_left_indent_position,
@@ -362,7 +364,7 @@ def write_dataelement_to_dataframe(
 
             if edifact_struktur_cell.paragraphs[0].text != "":
                 parse_paragraph_in_edifact_struktur_column_to_dataframe(
-                    paragraph=edifact_struktur_cell.paragraphs[0],
+                    paragraphs=edifact_struktur_cell.paragraphs,
                     dataframe=elixir.soul,
                     row_index=elixir.current_df_row_index,
                     edifact_struktur_cell_left_indent_position=elixir.edifact_struktur_left_indent_position,

diff --git a/pyproject.toml b/pyproject.toml
@@ -8,3 +8,8 @@ profile = "black"
 [tool.pylint."MESSAGES CONTROL"]
 max-line-length = 120
 good-names=["i", "j","k", "ex", "Run", "_", "df"]
+
+[tool.pytest.ini_options]
+pythonpath = [
+  "."
+]
diff --git a/unittests/test_write_functions.py b/unittests/test_write_functions.py
@@ -325,10 +325,10 @@ def test_parse_paragraph_in_edifact_struktur_column_to_dataframe(
 
         # insert text
         self.test_cell.text = text_content
-        test_paragraph = self.test_cell.paragraphs[0]
+        test_paragraph = [self.test_cell.paragraphs[0]]
 
         # set left indent positon
-        test_paragraph.paragraph_format.left_indent = left_indent_position
+        test_paragraph[0].paragraph_format.left_indent = left_indent_position
 
         # Initial two dataframes ...
         df = pd.DataFrame(columns=expected_df_row.keys(), dtype="str")
@@ -340,7 +340,7 @@ def test_parse_paragraph_in_edifact_struktur_column_to_dataframe(
         expected_df.loc[row_index] = initial_dataframe_row
 
         parse_paragraph_in_edifact_struktur_column_to_dataframe(
-            paragraph=test_paragraph,
+            paragraphs=test_paragraph,
             dataframe=df,
             row_index=row_index,
             edifact_struktur_cell_left_indent_position=self.edifact_struktur_cell_left_indent_position_of_indicator_paragraph,