diff --git a/data/__init__.py b/data/__init__.py
new file mode 100644
index 0000000..ff32f22
--- /dev/null
+++ b/data/__init__.py
@@ -0,0 +1,37 @@
+"""Helper functions for loading data into multiple different representations."""
+from ast import literal_eval
+from collections import Counter
+from pathlib import Path
+
+import pandas as pd
+
+
+def get_df():
+    """Get the dataset unmodified in a pandas.DataFrame."""
+    return pd.read_csv(
+        Path(__file__).parent.joinpath("haikus.csv"),
+        index_col=0,
+        # Ensure that the list of lines is interpreted as a list, not a string...
+        converters={"haiku": literal_eval},
+    )
+
+
+def get_bag_of_words():
+    """Get the dataset in a bag of words representation."""
+    df = get_df()
+    bag = Counter()
+    for haiku in df["haiku"]:
+        for line in haiku:
+            bag.update(line.split())
+
+    return bag
+
+
+def get_bag_of_lines():
+    """Get the dataset in a bag of lines representation."""
+    df = get_df()
+    lines = []
+    for haiku in df["haiku"]:
+        for line in haiku:
+            lines.append(line)
+    return Counter(lines)
diff --git a/experiments/eda/words.ipynb b/experiments/eda/words.ipynb
index adbbb8e..3949a70 100644
--- a/experiments/eda/words.ipynb
+++ b/experiments/eda/words.ipynb
@@ -12,6 +12,49 @@
     "* Potentially use to clean the dataset\n",
     "* British spellings"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The autoreload extension is already loaded. To reload it, use:\n",
+      "  %reload_ext autoreload\n"
+     ]
+    }
+   ],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2\n",
+    "%aimport data\n",
+    "\n",
+    "%config InlineBackend.figure_format = 'svg'\n",
+    "%matplotlib inline\n",
+    "\n",
+    "import matplotlib.pyplot as plt\n",
+    "import seaborn as sns\n",
+    "sns.set()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "bag = data.get_bag_of_words()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {