# Constructing a Graph Database from the Data
This notebook takes the enriched JSON files returned by the 01a Helper Script and processes them. As a result, several CSV Files are returned that can be used to construct a graph database in neo4j (see bootom of the notebook for more info).

In [1]:
import os
import hashlib
import pandas as pd

In [2]:
# Some Control Variables
PWC_PROCESSED_JSON_PATH = "data/pwc_processed_json/"
NEO4J_PATH = "data/neo4j/"
NEO4J_DB_NAME = "neo4j"
REPLACE_DB = True

if not os.path.exists(NEO4J_PATH):
    os.makedirs(NEO4J_PATH)
if not os.path.exists(PWC_PROCESSED_JSON_PATH):
    os.makedirs(PWC_PROCESSED_JSON_PATH)

In [3]:
# First compute everything for the papers_df, because it is too large to fit in memory with the other dataframes...
papers_df = pd.read_json(PWC_PROCESSED_JSON_PATH + 'papers_processed.json', dtype={"id": str}, encoding="utf-8")
paper_keywords = [keyword[0] for row in papers_df["abstract_keywords"] if row is not None for keyword in row if keyword]
# Append all keywords from the title_keywords column
paper_keywords += [keyword[0] for row in papers_df["title_keywords"] if row is not None for keyword in row if keyword]

# Create edges between papers and keywords
print("Creating edges between papers and abstract keywords...")
papers_abstract_keywords_df = (papers_df[['id', 'abstract_keywords']]
                                .rename(columns={'abstract_keywords': 'keywords'})
                                .explode('keywords')
                                .dropna()
                                .assign(keyword=lambda df: df['keywords'].str[0], score=lambda df: df['keywords'].str[1])
                                .drop(columns=['keywords'])
                                .assign(keyword_id=lambda df: ("keyword/" + df['keyword']).apply(lambda x: str(int(hashlib.md5(x.encode('utf-8')).hexdigest(), 16))))
                                .drop(columns=['keyword'])
                                .rename(columns={'id': 'paper_id'})
                                .reindex(columns=['paper_id', 'keyword_id', 'score'])
                                .reset_index(drop=True))
print(f"[EDGES] Got {len(papers_abstract_keywords_df)} rows in papers_abstract_keywords_df")

print("Creating edges between papers and title keywords...")
papers_title_keywords_df = (papers_df[['id', 'title_keywords']]
                                .rename(columns={'title_keywords': 'keywords'})
                                .explode('keywords')
                                .dropna()
                                .assign(keyword=lambda df: df['keywords'].str[0], score=lambda df: df['keywords'].str[1])
                                .drop(columns=['keywords'])
                                .assign(keyword_id=lambda df: ("keyword/" + df['keyword']).apply(lambda x: str(int(hashlib.md5(x.encode('utf-8')).hexdigest(), 16))))
                                .drop(columns=['keyword'])
                                .rename(columns={'id': 'paper_id'})
                                .reindex(columns=['paper_id', 'keyword_id', 'score'])
                                .reset_index(drop=True))
print(f"[EDGES] Got {len(papers_title_keywords_df)} rows in papers_title_keywords_df")

# papers_df
papers_df = papers_df.rename(columns={"id": "id:ID"})
papers_df = papers_df[["id:ID"] + [col for col in papers_df.columns if col != "id:ID"]]
papers_df[':LABEL'] = "Paper"

print("Saving papers_df...")
papers_df.to_csv(NEO4J_PATH + "papers.csv", index=False)

# Delete papers_df to free memory
del papers_df
print("Done and deleted papers_df")

Creating edges between papers and abstract keywords...
[EDGES] Got 3045277 rows in papers_abstract_keywords_df
Creating edges between papers and title keywords...
[EDGES] Got 1284495 rows in papers_title_keywords_df
Saving papers_df...
Done and deleted papers_df


In [4]:
# If new embeddings, keywords and OpenAlex Stuff are created, load them into the dataframes
# papers_df = pd.read_json(PWC_PROCESSED_JSON_PATH + 'papers_processed.json', dtype={"id": str}, encoding="utf-8")
methods_df = pd.read_json(PWC_PROCESSED_JSON_PATH + 'methods_processed.json', dtype={"id": str}, encoding="utf-8")
tasks_df = pd.read_json(PWC_PROCESSED_JSON_PATH + 'tasks_processed.json', dtype={"area_id_md5": str, "id_md5": str}, encoding="utf-8")
areas_df = pd.read_json(PWC_PROCESSED_JSON_PATH + 'areas_processed.json', dtype={"id_md5": str}, encoding="utf-8")
datasets_df = pd.read_json(PWC_PROCESSED_JSON_PATH + 'datasets_processed.json', dtype={"id": str}, encoding="utf-8")
repos_df = pd.read_json(PWC_PROCESSED_JSON_PATH + 'repos.json', dtype={"id": str}, encoding="utf-8")

datasets_tasks_df = pd.read_json(PWC_PROCESSED_JSON_PATH + 'datasets_tasks.json', dtype={"dataset_id": str, "task_id": str, "task_id_md5": str}, encoding="utf-8")
papers_tasks_df = pd.read_json(PWC_PROCESSED_JSON_PATH + 'papers_tasks.json', dtype={"paper_id": str, "task_id": str, "task_id_md5": str}, encoding="utf-8")
papers_methods_df = pd.read_json(PWC_PROCESSED_JSON_PATH + 'papers_methods.json', dtype={"paper_id": str, "method_id": str, "method_id_md5": str}, encoding="utf-8")
papers_repos_df = pd.read_json(PWC_PROCESSED_JSON_PATH + 'papers_repos.json', dtype={"paper_id": str, "repo_id": str}, encoding="utf-8")
tasks_areas_df = pd.read_json(PWC_PROCESSED_JSON_PATH + 'tasks_areas.json', dtype={"id": str, "area": str}, encoding="utf-8")

  return values.astype(dtype, copy=copy)


In [5]:
# Grab all keywords from all dataframes and put them into a list. Remember that the keyword column in each dataframe is a list of dicts with the keyword and the score. I only need the keyword here.
print("Grabbing all keywords from all dataframes...")
method_keywords = [keyword[0] for row in methods_df["description_keywords"] if row is not None for keyword in row if keyword]
dataset_keywords = [keyword[0] for row in datasets_df["description_keywords"] if row is not None for keyword in row if keyword]
task_keywords = [keyword[0] for row in tasks_df["description_keywords"] if row is not None for keyword in row if keyword]

# Merge the lists and remove duplicates
all_keywords = list(paper_keywords + method_keywords + dataset_keywords + task_keywords)
print(f"Got {len(all_keywords)} keywords from all dataframes!")
unique_keywords = list(set(all_keywords))
print(f"Got {len(unique_keywords)} unique keywords from all dataframes (Around {(len(unique_keywords) / len(all_keywords))*100}%).")

# Create a df with two columns: id and keyword. Assign each keyword a unique id the same way as the other nodes
keywords_df = pd.DataFrame(unique_keywords, columns=["keyword"])
keywords_df['id'] = ("keyword/" + keywords_df['keyword']).apply(lambda x: str(int(hashlib.md5(x.encode('utf-8')).hexdigest(), 16)))
keywords_df = keywords_df.reset_index(drop=True)
# Put the id column in front
keywords_df = keywords_df[['id', 'keyword']]
print(f"[NODES] Got {len(keywords_df)} rows in keywords_df")

# Create edges between methods and keywords
print("Creating edges between methods and keywords...")
methods_keywords_df = (methods_df[['id', 'description_keywords']]
                      .rename(columns={'description_keywords': 'keywords'})
                      .explode('keywords')
                      .dropna()
                      .assign(keyword=lambda df: df['keywords'].str[0], score=lambda df: df['keywords'].str[1])
                      .drop(columns=['keywords'])
                      .assign(keyword_id=lambda df: ("keyword/" + df['keyword']).apply(lambda x: str(int(hashlib.md5(x.encode('utf-8')).hexdigest(), 16))))
                      .drop(columns=['keyword'])
                      .rename(columns={'id': 'method_id'})
                      .reindex(columns=['method_id', 'keyword_id', 'score'])
                      .reset_index(drop=True))
print(f"[EDGES] Got {len(methods_keywords_df)} rows in methods_keywords_df")

# Create edges between datasets and keywords
print("Creating edges between datasets and keywords...")
datasets_keywords_df = (datasets_df[['id', 'description_keywords']]
                        .rename(columns={'description_keywords': 'keywords'})
                      .explode('keywords')
                      .dropna()
                      .assign(keyword=lambda df: df['keywords'].str[0], score=lambda df: df['keywords'].str[1])
                      .drop(columns=['keywords'])
                      .assign(keyword_id=lambda df: ("keyword/" + df['keyword']).apply(lambda x: str(int(hashlib.md5(x.encode('utf-8')).hexdigest(), 16))))
                      .drop(columns=['keyword'])
                      .rename(columns={'id': 'dataset_id'})
                      .reindex(columns=['dataset_id', 'keyword_id', 'score'])
                      .reset_index(drop=True))
print(f"[EDGES] Got {len(datasets_keywords_df)} rows in datasets_keywords_df")

# Create edges between tasks and keywords
print("Creating edges between tasks and keywords...")
tasks_keywords_df = (tasks_df[['id_md5', 'description_keywords']]
                        .rename(columns={'description_keywords': 'keywords'})
                      .explode('keywords')
                      .dropna()
                      .assign(keyword=lambda df: df['keywords'].str[0], score=lambda df: df['keywords'].str[1])
                      .drop(columns=['keywords'])
                      .assign(keyword_id=lambda df: ("keyword/" + df['keyword']).apply(lambda x: str(int(hashlib.md5(x.encode('utf-8')).hexdigest(), 16))))
                      .drop(columns=['keyword'])
                      .rename(columns={'id_md5': 'task_id'})
                      .reindex(columns=['task_id', 'keyword_id', 'score'])
                      .reset_index(drop=True))
print(f"[EDGES] Got {len(tasks_keywords_df)} rows in tasks_keywords_df")

# Save the keywords and the papers_keywords_df to PWC_PROCESSED_JSON_PATH
print("Saving keywords and the papers_keywords_df to PWC_PROCESSED_JSON_PATH...")
keywords_df.to_json(PWC_PROCESSED_JSON_PATH + "keywords.json", orient="records")
papers_title_keywords_df.to_json(PWC_PROCESSED_JSON_PATH + "papers_title_keywords_df.json", orient="records")
papers_abstract_keywords_df.to_json(PWC_PROCESSED_JSON_PATH + "papers_abstract_keywords_df.json", orient="records")
methods_keywords_df.to_json(PWC_PROCESSED_JSON_PATH + "methods_keywords.json", orient="records")
datasets_keywords_df.to_json(PWC_PROCESSED_JSON_PATH + "datasets_keywords.json", orient="records")
tasks_keywords_df.to_json(PWC_PROCESSED_JSON_PATH + "tasks_keywords.json", orient="records")
print("Saved keywords and the papers_keywords_df to PWC_PROCESSED_JSON_PATH!")

Grabbing all keywords from all dataframes...
Got 4395001 keywords from all dataframes!
Got 971414 unique keywords from all dataframes (Around 22.10270259324173%).
[NODES] Got 971414 rows in keywords_df
Creating edges between methods and keywords...
[EDGES] Got 11125 rows in methods_keywords_df
Creating edges between datasets and keywords...
[EDGES] Got 47412 rows in datasets_keywords_df
Creating edges between tasks and keywords...
[EDGES] Got 6692 rows in tasks_keywords_df
Saving keywords and the papers_keywords_df to PWC_PROCESSED_JSON_PATH...
Saved keywords and the papers_keywords_df to PWC_PROCESSED_JSON_PATH!


In [6]:
from sentence_transformers import SentenceTransformer

# Calculate embeddings for each keyword and store them as features in the keywords_df
print("Calculating embeddings for each keyword and store them as features in the keywords_df...")
# Load the SentenceTransformer model
model = SentenceTransformer('malteos/SciNCL')
# Calculate the embeddings for each keyword
keyword_embeddings = model.encode(keywords_df.keyword.tolist(), show_progress_bar=True)
# Add the embeddings to the keywords_df
keywords_df["embedding"] = keyword_embeddings.tolist()
print("Done calculating embeddings for each keyword and store them as features in the keywords_df!")

Calculating embeddings for each keyword and store them as features in the keywords_df...


No sentence-transformers model found with name /home/wilinski/.cache/torch/sentence_transformers/malteos_SciNCL. Creating a new one with MEAN pooling.


Batches:   0%|          | 0/30357 [00:00<?, ?it/s]

Done calculating embeddings for each keyword and store them as features in the keywords_df!


In [7]:
# Create CSV importable files form neo4j
print("Creating CSV importable files for neo4j...")

# Change the column names slightly to match the neo4j import format https://neo4j.com/docs/operations-manual/current/tutorial/neo4j-admin-import/
# # papers_df
# papers_df = papers_df.rename(columns={"id": "id:ID"})
# papers_df = papers_df[["id:ID"] + [col for col in papers_df.columns if col != "id:ID"]]
# papers_df[':LABEL'] = "Paper"
# methods_df
methods_df = methods_df.rename(columns={"id": "id:ID"})
methods_df['id:ID'] = methods_df['id:ID'].astype(str)
methods_df = methods_df[["id:ID"] + [col for col in methods_df.columns if col != "id:ID"]]
methods_df[':LABEL'] = "Method"
# repos_df
repos_df = repos_df.rename(columns={"id": "id:ID"})
repos_df['id:ID'] = repos_df['id:ID'].astype(str)
repos_df = repos_df[["id:ID"] + [col for col in repos_df.columns if col != "id:ID"]]
repos_df[':LABEL'] = "Repo"
# areas_df
areas_df = areas_df.rename(columns={"id_md5": "id:ID"})
areas_df['id:ID'] = areas_df['id:ID'].astype(str)
areas_df = areas_df[["id:ID"] + [col for col in areas_df.columns if col != "id:ID"]]
areas_df[':LABEL'] = "Area"
# tasks_df
tasks_df = tasks_df.rename(columns={"id_md5": "id:ID"})
tasks_df['id:ID'] = tasks_df['id:ID'].astype(str)
tasks_df = tasks_df[["id:ID"] + [col for col in tasks_df.columns if col != "id:ID"]]
tasks_df[':LABEL'] = "Task"
# datasets_df
datasets_df = datasets_df.rename(columns={"id": "id:ID"})
datasets_df['id:ID'] = datasets_df['id:ID'].astype(str)
datasets_df = datasets_df[["id:ID"] + [col for col in datasets_df.columns if col != "id:ID"]]
datasets_df[':LABEL'] = "Dataset"
# keywords_df
keywords_df = keywords_df.rename(columns={"id": "id:ID"})
keywords_df['id:ID'] = keywords_df['id:ID'].astype(str)
keywords_df = keywords_df[["id:ID"] + [col for col in keywords_df.columns if col != "id:ID"]]
keywords_df[':LABEL'] = "Keyword"

# datasets_tasks_df
datasets_tasks_df = datasets_tasks_df.rename(columns={"dataset_id": ":START_ID", "task_id_md5": ":END_ID"})
datasets_tasks_df[':START_ID'] = datasets_tasks_df[':START_ID'].astype(str)
datasets_tasks_df[':END_ID'] = datasets_tasks_df[':END_ID'].astype(str)
# datasets_tasks_df = datasets_tasks_df.drop(columns=['dataset_id'])
datasets_tasks_df[':TYPE'] = "HAS_TASK"
# papers_tasks_df
papers_tasks_df = papers_tasks_df.rename(columns={"paper_id": ":START_ID", "task_id_md5": ":END_ID"})
papers_tasks_df[':START_ID'] = papers_tasks_df[':START_ID'].astype(str)
papers_tasks_df[':END_ID'] = papers_tasks_df[':END_ID'].astype(str)
# papers_tasks_df = papers_tasks_df.drop(columns=['task_id'])
papers_tasks_df[':TYPE'] = "HAS_TASK"
# papers_methods_df
papers_methods_df = papers_methods_df.rename(columns={"paper_id": ":START_ID", "method_id_md5": ":END_ID"})
papers_methods_df[':START_ID'] = papers_methods_df[':START_ID'].astype(str)
papers_methods_df[':END_ID'] = papers_methods_df[':END_ID'].astype(str)
# papers_methods_df = papers_methods_df.drop(columns=['method_id'])
papers_methods_df[':TYPE'] = "HAS_METHOD"
# papers_repos_df
papers_repos_df = papers_repos_df.rename(columns={"paper_id": ":START_ID", "repo_id": ":END_ID"})
papers_repos_df[':START_ID'] = papers_repos_df[':START_ID'].astype(str)
papers_repos_df[':END_ID'] = papers_repos_df[':END_ID'].astype(str)
papers_repos_df[':TYPE'] = "HAS_REPO"
# tasks_areas_df
tasks_areas_df = tasks_areas_df.rename(columns={"id": ":START_ID", "area": ":END_ID"})
tasks_areas_df[':START_ID'] = tasks_areas_df[':START_ID'].astype(str)
tasks_areas_df[':END_ID'] = tasks_areas_df[':END_ID'].astype(str)
tasks_areas_df[':TYPE'] = "HAS_AREA"
# papers_title_keywords_df
papers_title_keywords_df = papers_title_keywords_df.rename(columns={"paper_id": ":START_ID", "keyword_id": ":END_ID"})
papers_title_keywords_df[':START_ID'] = papers_title_keywords_df[':START_ID'].astype(str)
papers_title_keywords_df[':END_ID'] = papers_title_keywords_df[':END_ID'].astype(str)
papers_title_keywords_df['source'] = "title"
papers_title_keywords_df[':TYPE'] = "HAS_KEYWORD"
# papers_abstract_keywords_df
papers_abstract_keywords_df = papers_abstract_keywords_df.rename(columns={"paper_id": ":START_ID", "keyword_id": ":END_ID"})
papers_abstract_keywords_df[':START_ID'] = papers_abstract_keywords_df[':START_ID'].astype(str)
papers_abstract_keywords_df[':END_ID'] = papers_abstract_keywords_df[':END_ID'].astype(str)
papers_abstract_keywords_df['source'] = "abstract"
papers_abstract_keywords_df[':TYPE'] = "HAS_KEYWORD"
# methods_keywords_df
methods_keywords_df = methods_keywords_df.rename(columns={"method_id": ":START_ID", "keyword_id": ":END_ID"})
methods_keywords_df[':START_ID'] = methods_keywords_df[':START_ID'].astype(str)
methods_keywords_df[':END_ID'] = methods_keywords_df[':END_ID'].astype(str)
methods_keywords_df['source'] = "description"
methods_keywords_df[':TYPE'] = "HAS_KEYWORD"
# datasets_keywords_df
datasets_keywords_df = datasets_keywords_df.rename(columns={"dataset_id": ":START_ID", "keyword_id": ":END_ID"})
datasets_keywords_df[':START_ID'] = datasets_keywords_df[':START_ID'].astype(str)
datasets_keywords_df[':END_ID'] = datasets_keywords_df[':END_ID'].astype(str)
datasets_keywords_df['source'] = "description"
datasets_keywords_df[':TYPE'] = "HAS_KEYWORD"
# tasks_keywords_df
tasks_keywords_df = tasks_keywords_df.rename(columns={"task_id": ":START_ID", "keyword_id": ":END_ID"})
tasks_keywords_df[':START_ID'] = tasks_keywords_df[':START_ID'].astype(str)
tasks_keywords_df[':END_ID'] = tasks_keywords_df[':END_ID'].astype(str)
tasks_keywords_df['source'] = "description"
tasks_keywords_df[':TYPE'] = "HAS_KEYWORD"
print("Done creating CSV importable files for neo4j!")

# Save the dataframes to csv files into the neo4j import folder
print("Saving CSV files to neo4j import folder...")
# papers_df.to_csv(NEO4J_PATH + "papers.csv", index=False)
methods_df.to_csv(NEO4J_PATH + "methods.csv", index=False)
repos_df.to_csv(NEO4J_PATH + "repos.csv", index=False)
areas_df.to_csv(NEO4J_PATH + "areas.csv", index=False)
tasks_df.to_csv(NEO4J_PATH + "tasks.csv", index=False)
datasets_df.to_csv(NEO4J_PATH + "datasets.csv", index=False)
datasets_tasks_df.to_csv(NEO4J_PATH + "datasets_tasks.csv", index=False)
papers_tasks_df.to_csv(NEO4J_PATH + "papers_tasks.csv", index=False)
papers_methods_df.to_csv(NEO4J_PATH + "papers_methods.csv", index=False)
papers_repos_df.to_csv(NEO4J_PATH + "papers_repos.csv", index=False)
tasks_areas_df.to_csv(NEO4J_PATH + "tasks_areas.csv", index=False)
keywords_df.to_csv(NEO4J_PATH + "keywords.csv", index=False)
papers_title_keywords_df.to_csv(NEO4J_PATH + "papers_title_keywords.csv", index=False)
papers_abstract_keywords_df.to_csv(NEO4J_PATH + "papers_abstract_keywords.csv", index=False)
methods_keywords_df.to_csv(NEO4J_PATH + "methods_keywords.csv", index=False)
datasets_keywords_df.to_csv(NEO4J_PATH + "datasets_keywords.csv", index=False)
tasks_keywords_df.to_csv(NEO4J_PATH + "tasks_keywords.csv", index=False)
print("Done saving CSV files to neo4j import folder!")

Creating CSV importable files for neo4j...
Done creating CSV importable files for neo4j!
Saving CSV files to neo4j import folder...
Done saving CSV files to neo4j import folder!


In [8]:
# Create a neo4j admin-import command to import the data into neo4j
print("Creating neo4j admin-import command...")
print("")
# Look into the neo4j folder and list all csv files that have no underscore in their name
node_csv_files = [file for file in os.listdir(NEO4J_PATH) if file.endswith(".csv") and "_" not in file]
relationship_csv_files = [file for file in os.listdir(NEO4J_PATH) if file.endswith(".csv") and "_" in file]
node_csv_files = [file for file in node_csv_files if not file.startswith("._")] # Filter out all files that start with ._ (Mac OS hidden files)
relationship_csv_files = [file for file in relationship_csv_files if not file.startswith("._")] # Filter out all files that start with ._ (Mac OS hidden files)
neo4j_command_start = "bin/neo4j-admin database import full "
neo4j_command_end = f"--multiline-fields --read-buffer-size=8000000 --skip-bad-relationships --bad-tolerance=100000000 --skip-duplicate-nodes"
if REPLACE_DB:
    neo4j_command_end += " --overwrite-destination"
    
# Add a "--nodes=import/..." for each node csv file
neo4j_command_nodes = ""
for node_csv_file in node_csv_files:
    neo4j_command_nodes += f"--nodes=import/{node_csv_file} "
    
# Add a "--relationships=import/..." for each relationship csv file
neo4j_command_relationships = ""
for relationship_csv_file in relationship_csv_files:
    neo4j_command_relationships += f"--relationships=import/{relationship_csv_file} "
    
# Combine the command parts
command = neo4j_command_start + neo4j_command_nodes + neo4j_command_relationships + neo4j_command_end + f" {NEO4J_DB_NAME}"
print("=== COMMAND ===")
print("")
print(command)
print("")
print("=== END COMMAND ===")
print("")

# Write a txt file called "neo4j_import_command.txt" with the command in the neo4j folder
print("Writing a txt file called 'neo4j_import_command.txt' with the command in the neo4j folder...")
with open(NEO4J_PATH + "neo4j_import_command.txt", "w") as text_file:
    text_file.write(command)

print("Done creating neo4j admin-import command!")

Creating neo4j admin-import command...

=== COMMAND ===

bin/neo4j-admin database import full --nodes=import/authors.csv --nodes=import/institutions.csv --nodes=import/fulltexts.csv --nodes=import/papers.csv --nodes=import/methods.csv --nodes=import/repos.csv --nodes=import/areas.csv --nodes=import/tasks.csv --nodes=import/datasets.csv --nodes=import/keywords.csv --relationships=import/authors_papers.csv --relationships=import/authors_institutions.csv --relationships=import/papers_citations.csv --relationships=import/papers_fulltexts.csv --relationships=import/datasets_tasks.csv --relationships=import/papers_tasks.csv --relationships=import/papers_methods.csv --relationships=import/papers_repos.csv --relationships=import/tasks_areas.csv --relationships=import/papers_keywords.csv --relationships=import/methods_keywords.csv --relationships=import/datasets_keywords.csv --relationships=import/tasks_keywords.csv --relationships=import/papers_title_keywords.csv --relationships=import/papers_

# Importing the CSVs into Neo4j
Now is the time for you to copy the CSV files into the import folder of your neo4j installation (either local or server/docker). Grab the import command from the notebook and run it in the neo4j terminal. The import should not take long.

Adhere to the following steps:
1. Stop the server
2. Copy the CSV files into the import folder of your DBMS
3. Run the admin -import tool and use a new database name (e.g. patentsview). You can also choose an existing one but with the flag --overwrite-destination.
4. Start the server
5. In the console switch to the System database using the pulldown control
6. Run the following command at the system prompt: create database aDatabaseName (should be the same name set in #2 above)
7. Switch to the database just created using the console pulldown control

Hint: You can skip steps 5 to 7 if you are on the desktop version. Simply create a new database in the desktop version and then switch to it.