In [None]:
import networkx as nx

In [None]:
# Step 1: Upload the CONLLU file
from IPython.display import display
from ipywidgets import FileUpload

# Create a file upload widget that accepts CONLLU files
upload = FileUpload(accept='.conll', multiple=False)

# Display the upload widget
display(upload)


FileUpload(value={}, accept='.conll', description='Upload')

In [None]:
# Check if any files were uploaded
if upload.value:
    # Get the uploaded file content
    uploaded_file_content = upload.value[next(iter(upload.value))]['content']
    # Process the uploaded file content
    # (Your code to process the file goes here)
else:
    print("No file uploaded. Please upload a CoNLL-U format file.")


In [None]:
!pip install python-docx


Collecting python-docx
  Downloading python_docx-1.1.2-py3-none-any.whl (244 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.3/244.3 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: python-docx
Successfully installed python-docx-1.1.2


In [None]:
import re

def remove_punctuation(text):
    return re.sub(r'[^\w\s]', '', text)


In [None]:
def extract_nodes_and_edges(conllu_content):
    sentences = conllu_content.strip().split('\n\n')
    all_nodes = []
    all_edges = []
    sent_id = 0
    for sentence in sentences:
        sent_id += 1
        lines = sentence.strip().split('\n')
        nodes = []
        edges = []

        for line in lines:
            if line.startswith('#'):
                continue
            parts = line.split('\t')
            node_id = parts[0]
            word = parts[1]
            pos = parts[3]
            head = parts[5]

            # Remove punctuation from words
            cleaned_word = remove_punctuation(word)

            if cleaned_word:  # Only add nodes with non-empty words
                nodes.append((node_id, cleaned_word, pos, sent_id))
                if head != '0':  # Only add edges if head is not '0'
                    edges.append((head, node_id))

        all_nodes.append(nodes)
        all_edges.append(edges)

    return all_nodes, all_edges




In [None]:
uploaded_file_content = upload.value[next(iter(upload.value))]['content'].decode('utf-8')
sentences = uploaded_file_content.strip().split('\n\n')
for sentence in sentences:
  print(sentence+"\n\n\n\n")

1	Holly	Holly	PROPN	NNP	3	compound	0	5	2
2	Bluff	Bluff	PROPN	NNP	1	flat	7	12	1
3	Church	Church	PROPN	NNP	4	nsubj	14	20	1
4	wishes	wish	VERB	VBZ	0	root	22	28	4
5	to	to	PART	TO	6	mark	30	32	1
6	know	know	VERB	VB	4	xcomp	34	38	2
7	if	if	SCONJ	IN	10	mark	40	42	3
8	any	any	DET	DT	9	det	44	47	1
9	church	church	NOUN	NN	10	nsubj	48	54	1
10	has	have	VERB	VBZ	6	ccomp	56	59	4
11	church	church	NOUN	NN	12	compound	61	67	1
12	furniture	furniture	NOUN	NN	10	obj	69	78	2
13	it	it	PRON	PRP	15	nsubj	80	82	2
14	is	be	AUX	VBZ	15	aux	84	86	1
15	taking	take	VERB	VBG	12	acl:relcl	88	94	3
16	out	out	ADP	RP	15	compound:prt	96	99	1
17	to	to	PART	TO	18	mark	101	103	1
18	give	give	VERB	VB	15	advcl	104	108	3
19	place	place	NOUN	NN	18	obj	110	115	1
20	to	to	ADP	IN	22	case	117	119	2
21	new	new	ADJ	JJ	22	amod	121	124	1
22	furniture	furniture	NOUN	NN	18	obl	126	135	4
23	.	.	PUNCT	.	4	punct	135	136	19




1	If	if	SCONJ	IN	3	mark	138	140	2
2	you	you	PRON	PRP	3	nsubj	142	145	1
3	have	have	AUX	VBP	4	aux	147	151	1
4	used	us

In [None]:
def generate_latex_dependency_trees(all_nodes, all_edges):
    latex_code = ""

    for nodes, edges in zip(all_nodes, all_edges):
        latex_code += "\\begin{dependency}[theme = simple]\n"
        latex_code += "\\begin{deptext}[column sep=1em]\n"

        # Add words in the dependency tree
        words = " \& ".join([word for _, word, _, _ in nodes])
        latex_code += f"{words} \\\\\n\\end{{deptext}}\n"

        # Add dependency edges
        for head, dependent in edges:
            latex_code += f"\\depedge{{{head}}}{{{dependent}}}{{}}\n"

        latex_code += "\\end{dependency}\n\n"

    return latex_code


# Assuming you have uploaded the CoNLL-U file content
uploaded_file_content = upload.value[next(iter(upload.value))]['content'].decode('utf-8')

# Extract nodes and edges
all_nodes, all_edges = extract_nodes_and_edges(uploaded_file_content)

# Generate LaTeX code for dependency trees
latex_code = generate_latex_dependency_trees(all_nodes, all_edges)

# Print the LaTeX code
print(latex_code)


\begin{dependency}[theme = simple]
\begin{deptext}[column sep=1em]
Holly \& Bluff \& Church \& wishes \& to \& know \& if \& any \& church \& has \& church \& furniture \& it \& is \& taking \& out \& to \& give \& place \& to \& new \& furniture \\
\end{deptext}
\depedge{3}{1}{}
\depedge{1}{2}{}
\depedge{4}{3}{}
\depedge{6}{5}{}
\depedge{4}{6}{}
\depedge{10}{7}{}
\depedge{9}{8}{}
\depedge{10}{9}{}
\depedge{6}{10}{}
\depedge{12}{11}{}
\depedge{10}{12}{}
\depedge{15}{13}{}
\depedge{15}{14}{}
\depedge{12}{15}{}
\depedge{15}{16}{}
\depedge{18}{17}{}
\depedge{15}{18}{}
\depedge{18}{19}{}
\depedge{22}{20}{}
\depedge{22}{21}{}
\depedge{18}{22}{}
\end{dependency}

\begin{dependency}[theme = simple]
\begin{deptext}[column sep=1em]
If \& you \& have \& used \& church \& furniture \& to \& dispose \& of \& write \& F \& B \& Hart \& Holly \& Bluff \\
\end{deptext}
\depedge{3}{1}{}
\depedge{3}{2}{}
\depedge{4}{3}{}
\depedge{6}{5}{}
\depedge{4}{6}{}
\depedge{8}{7}{}
\depedge{4}{8}{}
\depedge{8}{9}