In [20]:
import pandas as pd
from pathlib import Path

frame = pd.read_parquet("./import/etymology.parquet")

frame

data_path = Path("import")
readme_path = data_path / Path("README.md")
with readme_path.open(mode="w+") as out:
    out.write(
        """
## Preperation

```cypherl
// Clear data
MATCH (n)
DELETE n;
```

Then

```cypherl
STORAGE MODE IN_MEMORY_ANALYTICAL;
```
              """
    )

In [21]:
def write_lines(file_name: str, data: pd.DataFrame):
    with open(file_name, mode="w+", encoding="utf-16") as file:
        file.writelines(l + "\n" for l in iter(data))

In [22]:
# Words
def word_cypher(row: pd.DataFrame):
    return f"CREATE (w:Word {{ id: \"{row['term_id']}\", lang: \"{row['lang']}\", term: \"{row['term']}\" }});"


word_data = (
    frame[["term_id", "term", "lang"]]
    .dropna()
    .drop_duplicates()
    # .apply(word_cypher, axis="columns")
)

# write_lines("./import/sample_words.cypherl", word_data.iloc[0:10000])
# write_lines("./import/words.cypherl", word_data)
word_data.iloc[:10000].to_csv(
    "./import/sample_words.csv", sep="|", encoding="utf-8", index=False
)
word_data.to_csv("./import/words.csv", sep="|", encoding="utf-8", index=False)

with readme_path.open(mode="a") as out:
    out.write(
        """
Import words

```
LOAD CSV FROM "/import/words.csv" WITH HEADER DELIMITER "|" AS line
CREATE (w:Word { id: line["term_id"], lang: line["lang"], term: line["term"] })
RETURN COUNT(w) AS `Inserted`;
```

Create indexes
```
CREATE INDEX ON :Word(id);
CREATE INDEX ON :Word(lang);
CREATE INDEX ON :Word(term);
```
"""
    )
# write_lines("./import/sample_words.csv", word_data)
# write_lines("./import/words.csv", word_data)

In [24]:
# Relationships
mapper = {"term_id": "from", "reltype": "rel", "related_term_id": "to"}
relations = (
    frame[list(mapper.keys())]
    .dropna()
    .drop_duplicates()
    .rename(mapper=mapper, axis="columns")
)

with readme_path.open(mode="a") as out:
    out.write(
        """
Import relationships

```
"""
    )
    for group, rels in relations.groupby("rel"):
        filename = f"{group}.csv"
        rels[["from", "to"]].to_csv(f"./import/{filename}", sep="|", encoding="utf-8", index=False)
        out.write(
            f"""
LOAD CSV FROM "/import/{filename}" WITH HEADER DELIMITER "|" AS line
MATCH (a:Word {{ id: line["from"] }}), (b:Word {{ id: line["to"] }})
CREATE (a)-[r:{group.replace("-", "_")}]->(b);
                  """
        )
    out.write("\n```")