diff --git a/api/api.py b/api/api.py
index ff353a8..707fa7c 100644
--- a/api/api.py
+++ b/api/api.py
@@ -10,6 +10,60 @@
     predict_multilabel_sklearn,
 )
 
+minor_cats_v5 = [
+    "Gratitude/ good experience",
+    "Negative experience",
+    "Not assigned",
+    "Organisation & efficiency",
+    "Funding & use of financial resources",
+    "Non-specific praise for staff",
+    "Non-specific dissatisfaction with staff",
+    "Staff manner & personal attributes",
+    "Number & deployment of staff",
+    "Staff responsiveness",
+    "Staff continuity",
+    "Competence & training",
+    "Unspecified communication",
+    "Staff listening, understanding & involving patients",
+    "Information directly from staff during care",
+    "Information provision & guidance",
+    "Being kept informed, clarity & consistency of information",
+    "Service involvement with family/ carers",
+    "Patient contact with family/ carers",
+    "Contacting services",
+    "Appointment arrangements",
+    "Appointment method",
+    "Timeliness of care",
+    "Pain management",
+    "Diagnosis & triage",
+    "Referals & continuity of care",
+    "Length of stay/ duration of care",
+    "Discharge",
+    "Care plans",
+    "Patient records",
+    "Links with non-NHS organisations",
+    "Cleanliness, tidiness & infection control",
+    "Safety & security",
+    "Provision of medical equipment",
+    "Service location",
+    "Transport to/ from services",
+    "Parking",
+    "Electronic entertainment",
+    "Feeling safe",
+    "Patient appearance & grooming",
+    "Mental Health Act",
+    "Equality, Diversity & Inclusion",
+    "Admission",
+    "Collecting patients feedback",
+    "Labelling not possible",
+    "Environment & Facilities",
+    "Supplying & understanding medication",
+    "Activities & access to fresh air",
+    "Food & drink provision & facilities",
+    "Sensory experience",
+    "Impact of treatment/ care",
+]
+
 description = """
 This API is for classifying patient experience qualitative data,
 utilising the models trained as part of the pxtextmining project.
@@ -28,11 +82,8 @@ class Test(BaseModel):
     test: str
 
     class Config:
-        schema_extra = {
-            "example": {
-                "test": "Hello"
-            }
-        }
+        schema_extra = {"example": {"test": "Hello"}}
+
 
 class ItemIn(BaseModel):
     comment_id: str
@@ -85,16 +136,16 @@ class Config:
         "name": "MIT License",
         "url": "https://github.com/CDU-data-science-team/pxtextmining/blob/main/LICENSE",
     },
-    openapi_tags=tags_metadata
+    openapi_tags=tags_metadata,
 )
 
 
-@app.get("/", response_model=Test, tags=['index'])
+@app.get("/", response_model=Test, tags=["index"])
 def index():
     return {"test": "Hello"}
 
 
-@app.post("/predict_multilabel", response_model=List[ItemOut], tags=['predict'])
+@app.post("/predict_multilabel", response_model=List[ItemOut], tags=["predict"])
 def predict(items: List[ItemIn]):
     """Accepts comment ids, comment text and question type as JSON in a POST request. Makes predictions using trained SVC model.
 
@@ -128,7 +179,7 @@ def predict(items: List[ItemIn]):
     with open(model_path, "rb") as model:
         loaded_model = pickle.load(model)
     preds_df = predict_multilabel_sklearn(
-        text_to_predict, loaded_model, additional_features=True
+        text_to_predict, loaded_model, labels=minor_cats_v5, additional_features=True
     )
     # Join predicted labels with received data
     preds_df["comment_id"] = preds_df.index.astype(str)
diff --git a/pxtextmining/factories/factory_data_load_and_split.py b/pxtextmining/factories/factory_data_load_and_split.py
index 1b0fa88..1ae663c 100644
--- a/pxtextmining/factories/factory_data_load_and_split.py
+++ b/pxtextmining/factories/factory_data_load_and_split.py
@@ -120,7 +120,6 @@ def load_multilabel_data(filename, target="major_categories"):
         cols = ["Comment sentiment"]
     # Sort out the features first
     features_df = raw_data.loc[:, features].copy()
-    features_df = clean_empty_features(features_df)
     # Standardize FFT qs
     features_df['FFT question'] = features_df['FFT question'].fillna('nonspecific')
     features_df.loc[:, "FFT_q_standardised"] = (
@@ -135,6 +134,7 @@ def load_multilabel_data(filename, target="major_categories"):
     features_df.loc[:, "text_length"] = features_df.loc[:, "FFT answer"].apply(
         lambda x: len([word for word in str(x).split(" ") if word != ""])
     )
+    features_df = clean_empty_features(features_df)
     # Sort out the targets
     targets_df = raw_data.loc[:, cols].copy()
     targets_df = targets_df.replace("1", 1)
@@ -206,7 +206,11 @@ def process_data(df, target, preprocess_text=True, additional_features=False):
         X = clean_empty_features(X)
         print(f"After preprocessing, shape of X is {X.shape}")
     if preprocess_text == False:
-        X = df["FFT answer"].astype(str)
+        X_temp = df["FFT answer"].astype(str).apply(remove_punc_and_nums)
+        X_temp = clean_empty_features(X_temp)
+        print(f"After preprocessing, shape of X is {X_temp.shape}")
+        indices = X_temp.index
+        X = df["FFT answer"].astype(str).filter(indices)
     if additional_features == True:
         X = pd.merge(X, df[["FFT_q_standardised"]], left_index=True, right_index=True)
         X = X.reset_index()
diff --git a/pxtextmining/params.py b/pxtextmining/params.py
index e377390..b31698e 100644
--- a/pxtextmining/params.py
+++ b/pxtextmining/params.py
@@ -1,38 +1,36 @@
-dataset = "datasets/hidden/merged_20230602.csv"
+dataset = "datasets/hidden/merged_230608.csv"
 
-random_state = 99
+random_state = 42
 
 model_name = "distilbert-base-uncased"
 
 q_map = {
-        "Please tell us why": "nonspecific",
-        "Please tells us why you gave this answer?": "nonspecific",
-        "FFT Why?": "nonspecific",
-        "What was good?": "what_good",
-        "Is there anything we could have done better?": "could_improve",
-        "How could we improve?": "could_improve",
-        "What could we do better?": "could_improve",
-        "Please can you tell us why you gave your answer and what we could have done better?": "nonspecific",
-        "Please describe any things about the 111 service that\r\nyou were particularly satisfied and/or dissatisfied with": "nonspecific",
-        "Please describe any things about the 111 service that \nyou were particularly satisfied and/or dissatisfied with": "nonspecific",
-        "Please describe any things about the 111 service that\nyou were particularly satisfied and/or dissatisfied with": 'nonspecific',
-        "Nonspecific": 'nonspecific',
-        "nonspecific": 'nonspecific'
-    }
+    "Please tell us why": "nonspecific",
+    "Please tells us why you gave this answer?": "nonspecific",
+    "FFT Why?": "nonspecific",
+    "What was good?": "what_good",
+    "Is there anything we could have done better?": "could_improve",
+    "How could we improve?": "could_improve",
+    "What could we do better?": "could_improve",
+    "Please can you tell us why you gave your answer and what we could have done better?": "nonspecific",
+    "Please describe any things about the 111 service that\r\nyou were particularly satisfied and/or dissatisfied with": "nonspecific",
+    "Please describe any things about the 111 service that \nyou were particularly satisfied and/or dissatisfied with": "nonspecific",
+    "Please describe any things about the 111 service that\nyou were particularly satisfied and/or dissatisfied with": "nonspecific",
+    "Nonspecific": "nonspecific",
+    "nonspecific": "nonspecific",
+}
 
+# v6
 major_cat_dict = {
     "General": [
         "Labelling not possible",
-        "Gratitude/ good experience",
-        "Negative experience",
+        "Positive experience & gratitude",
+        "Negative experience & dissatisfaction",
         "Not assigned",
         "Organisation & efficiency",
         "Funding & use of financial resources",
-        "Collecting patients feedback",
     ],
     "Staff": [
-        "Non-specific praise for staff",
-        "Non-specific dissatisfaction with staff",
         "Staff manner & personal attributes",
         "Number & deployment of staff",
         "Staff responsiveness",
@@ -45,8 +43,7 @@
         "Information directly from staff during care",
         "Information provision & guidance",
         "Being kept informed, clarity & consistency of information",
-        "Service involvement with family/ carers",
-        "Patient contact with family/ carers",
+        "Interaction with family/ carers",
     ],
     "Access to medical care & support": [
         "Contacting services",
@@ -59,12 +56,10 @@
         "Diagnosis & triage",
         "Referals & continuity of care",
         "Admission",
-        "Length of stay/ duration of care",
         "Discharge",
         "Care plans",
         "Patient records",
         "Impact of treatment/ care",
-        "Links with non-NHS organisations",
     ],
     "Food & diet": ["Food & drink provision & facilities"],
     "Category TBC": [
@@ -77,7 +72,6 @@
         "Cleanliness, tidiness & infection control",
         "Sensory experience",
         "Environment & Facilities",
-        "Safety & security",
         "Provision of medical equipment",
     ],
     "Mental Health specifics": ["Mental Health Act"],
@@ -93,12 +87,12 @@
 # v6 20230602
 merged_minor_cats = [
     "Gratitude/ good experience",
-#     "Negative experience",
+    #     "Negative experience",
     "Not assigned",
     "Organisation & efficiency",
-#     "Funding & use of financial resources",
+    #     "Funding & use of financial resources",
     "Non-specific praise for staff",
-#     "Non-specific dissatisfaction with staff",
+    #     "Non-specific dissatisfaction with staff",
     "Staff manner & personal attributes",
     "Number & deployment of staff",
     "Staff responsiveness",
@@ -109,8 +103,8 @@
     "Information directly from staff during care",
     "Information provision & guidance",
     "Being kept informed, clarity & consistency of information",
-#     "Service involvement with family/ carers",
-#     "Patient contact with family/ carers",
+    #     "Service involvement with family/ carers",
+    #     "Patient contact with family/ carers",
     "Contacting services",
     "Appointment arrangements",
     "Appointment method",
@@ -118,14 +112,14 @@
     "Pain management",
     "Diagnosis & triage",
     "Referals & continuity of care",
-#     "Length of stay/ duration of care",
+    #     "Length of stay/ duration of care",
     "Discharge",
     "Care plans",
-#     "Patient records",
-#     "Links with non-NHS organisations",
+    #     "Patient records",
+    #     "Links with non-NHS organisations",
     "Cleanliness, tidiness & infection control",
     "Safety & security",
-#     "Provision of medical equipment",
+    #     "Provision of medical equipment",
     "Service location",
     "Transport to/ from services",
     "Parking",
@@ -135,7 +129,7 @@
     "Mental Health Act",
     "Equality, Diversity & Inclusion",
     "Admission",
-#     "Collecting patients feedback",
+    #     "Collecting patients feedback",
     "Labelling not possible",
     "Environment & Facilities",
     "Supplying & understanding medication",
@@ -144,18 +138,14 @@
     "Sensory experience",
     "Impact of treatment/ care",
     "Negative experience/ dissatisfaction",
-    "Family/ carers"
+    "Family/ carers",
 ]
 
-# v5 20230419
+# v6 20230806
 minor_cats = [
-    "Gratitude/ good experience",
-    "Negative experience",
     "Not assigned",
     "Organisation & efficiency",
     "Funding & use of financial resources",
-    "Non-specific praise for staff",
-    "Non-specific dissatisfaction with staff",
     "Staff manner & personal attributes",
     "Number & deployment of staff",
     "Staff responsiveness",
@@ -166,8 +156,6 @@
     "Information directly from staff during care",
     "Information provision & guidance",
     "Being kept informed, clarity & consistency of information",
-    "Service involvement with family/ carers",
-    "Patient contact with family/ carers",
     "Contacting services",
     "Appointment arrangements",
     "Appointment method",
@@ -175,13 +163,10 @@
     "Pain management",
     "Diagnosis & triage",
     "Referals & continuity of care",
-    "Length of stay/ duration of care",
     "Discharge",
     "Care plans",
     "Patient records",
-    "Links with non-NHS organisations",
     "Cleanliness, tidiness & infection control",
-    "Safety & security",
     "Provision of medical equipment",
     "Service location",
     "Transport to/ from services",
@@ -192,7 +177,6 @@
     "Mental Health Act",
     "Equality, Diversity & Inclusion",
     "Admission",
-    "Collecting patients feedback",
     "Labelling not possible",
     "Environment & Facilities",
     "Supplying & understanding medication",
@@ -200,6 +184,14 @@
     "Food & drink provision & facilities",
     "Sensory experience",
     "Impact of treatment/ care",
+    # "Psychological therapy arrangements",
+    # "Existence of services",
+    # "Choice of services",
+    # "Out of hours support (community services)",
+    # "Learning organisation",
+    "Interaction with family/ carers",
+    "Negative experience & dissatisfaction",
+    "Positive experience & gratitude",
 ]
 
 sentiment_dict = {
diff --git a/pxtextmining/pipelines/multilabel_pipeline.py b/pxtextmining/pipelines/multilabel_pipeline.py
index cc1df35..b19fe7c 100644
--- a/pxtextmining/pipelines/multilabel_pipeline.py
+++ b/pxtextmining/pipelines/multilabel_pipeline.py
@@ -357,15 +357,16 @@ def run_two_layer_sklearn_pipeline(
 
 
 if __name__ == "__main__":
-    run_bert_pipeline(additional_features = True, path = 'test_multilabel/merged_cats_0607', target = merged_minor_cats)
-    run_sklearn_pipeline(additional_features = True, target= merged_minor_cats, models_to_try = ["xgb"], path = 'test_multilabel/merged_cats_0607/xgb',
-                         include_analysis=True)
-    run_svc_pipeline(
-        additional_features=True,
-        target=merged_minor_cats,
-        path="test_multilabel/merged_cats_0607/svc",
-        include_analysis=True
-    )
-    run_sklearn_pipeline(additional_features = True, target= merged_minor_cats, models_to_try = ["svm"], path = 'test_multilabel/merged_cats_0607/svc_2',
-                         include_analysis=True)
+
+    # run_sklearn_pipeline(additional_features = True, target= minor_cats, models_to_try = ["xgb"], path = 'test_multilabel/v6_230806/xgb',
+    #                      include_analysis=True)
+    # run_svc_pipeline(
+    #     additional_features=True,
+    #     target=minor_cats,
+    #     path="test_multilabel/v6_230806/svc",
+    #     include_analysis=True
+    # )
+    run_bert_pipeline(additional_features = True, path = 'test_multilabel/v6_230806', target = minor_cats, include_analysis=True)
+    # run_sklearn_pipeline(additional_features = True, target= minor_cats, models_to_try = ["svm"], path = 'test_multilabel/v6_230806/svc_2',
+    #                      include_analysis=True)
     # run_two_layer_sklearn_pipeline()
diff --git a/tests/conftest.py b/tests/conftest.py
index 81158c6..700d8a1 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -2,6 +2,10 @@
 import pandas as pd
 import numpy as np
 from unittest.mock import Mock
+from pxtextmining.params import minor_cats, q_map
+import random
+import string
+
 
 @pytest.fixture
 def grab_test_X_additional_feats():
@@ -22,848 +26,52 @@ def grab_test_X_additional_feats():
         },
     }
     text_X_additional_feats = pd.DataFrame(data_dict)
-    text_X_additional_feats.index.name = 'Comment ID'
+    text_X_additional_feats.index.name = "Comment ID"
     return text_X_additional_feats
 
 
 @pytest.fixture
 def mock_read_csv(mocker, test_raw_data):
     mock = Mock()
-    mocker.patch('pandas.read_csv', return_value=test_raw_data)
+    mocker.patch("pandas.read_csv", return_value=test_raw_data)
     return mock
 
 
 @pytest.fixture
 def test_raw_data():
-    data_dict = {
-        "Comment ID": {
-            0: "Q1",
-            1: "Q2",
-            2: "Q3",
-            3: "Q4",
-            4: "Q5",
-            5: "Q6",
-            6: "Q7",
-            7: "Q8",
-            8: "Q9",
-            9: "Q10",
-        },
-        "Trust": {
-            0: "Trust A",
-            1: "Trust A",
-            2: "Trust A",
-            3: "Trust A",
-            4: "Trust A",
-            5: "Trust A",
-            6: "Trust A",
-            7: "Trust A",
-            8: "Trust A",
-            9: "Trust A",
-        },
-        "Respondent ID": {
-            0: "Anon",
-            1: "Anon",
-            2: "Anon",
-            3: "Anon",
-            4: "Anon",
-            5: "Anon",
-            6: "Anon",
-            7: "Anon",
-            8: "Anon",
-            9: "Anon",
-        },
-        "Date": {
-            0: "30/09/2021",
-            1: "30/09/2021",
-            2: "30/09/2021",
-            3: "30/09/2021",
-            4: "30/09/2021",
-            5: "30/09/2021",
-            6: "30/09/2021",
-            7: "30/09/2021",
-            8: "30/09/2021",
-            9: "30/09/2021",
-        },
-        "Service type 1": {
-            0: "Department A",
-            1: "Department A",
-            2: "Department A",
-            3: "Department A",
-            4: "Department A",
-            5: "Department A",
-            6: "Department A",
-            7: "Department A",
-            8: "Department A",
-            9: "Department A",
-        },
-        "Service type 2": {
-            0: np.NaN,
-            1: np.NaN,
-            2: np.NaN,
-            3: np.NaN,
-            4: np.NaN,
-            5: np.NaN,
-            6: np.NaN,
-            7: np.NaN,
-            8: np.NaN,
-            9: np.NaN,
-        },
-        "FFT categorical answer": {
-            0: 1,
-            1: 1,
-            2: 1,
-            3: 1,
-            4: 2,
-            5: 1,
-            6: 1,
-            7: 1,
-            8: 2,
-            9: 2,
-        },
-        "FFT question": {
-            0: "FFT Why?",
-            1: "FFT Why?",
-            2: "FFT Why?",
-            3: "FFT Why?",
-            4: "FFT Why?",
-            5: "FFT Why?",
-            6: "FFT Why?",
-            7: "FFT Why?",
-            8: "FFT Why?",
-            9: "FFT Why?",
-        },
-        "FFT answer": {
-            0: "Did not answer",
-            1: "Nothing.",
-            2: "Did not answer",
-            3: "None.",
-            4: "Cue waiting time by an hour.",
-            5: "Did not answer",
-            6: "None. Quite satisfied.",
-            7: "Cut the waiting time from 2 hours before appointment to 1 hour depending which hospital.",
-            8: "Nothing at all.",
-            9: "Nothing.",
-        },
-        "Person identifiable info?": {
-            0: np.NaN,
-            1: np.NaN,
-            2: np.NaN,
-            3: np.NaN,
-            4: np.NaN,
-            5: np.NaN,
-            6: np.NaN,
-            7: np.NaN,
-            8: np.NaN,
-            9: np.NaN,
-        },
-        "Comment sentiment": {
-            0: np.NaN,
-            1: 1.0,
-            2: np.NaN,
-            3: 1.0,
-            4: 3.0,
-            5: np.NaN,
-            6: 2.0,
-            7: 3.0,
-            8: 1.0,
-            9: 1.0,
-        },
-        "Gratitude/ good experience": {
-            0: np.NaN,
-            1: 1.0,
-            2: np.NaN,
-            3: 1.0,
-            4: np.NaN,
-            5: np.NaN,
-            6: 1.0,
-            7: np.NaN,
-            8: 1.0,
-            9: 1.0,
-        },
-        "Negative experience": {
-            0: np.NaN,
-            1: np.NaN,
-            2: np.NaN,
-            3: np.NaN,
-            4: np.NaN,
-            5: np.NaN,
-            6: np.NaN,
-            7: np.NaN,
-            8: np.NaN,
-            9: np.NaN,
-        },
-        "Not assigned": {
-            0: np.NaN,
-            1: np.NaN,
-            2: np.NaN,
-            3: np.NaN,
-            4: np.NaN,
-            5: np.NaN,
-            6: np.NaN,
-            7: np.NaN,
-            8: np.NaN,
-            9: np.NaN,
-        },
-        "Organisation & efficiency": {
-            0: np.NaN,
-            1: np.NaN,
-            2: np.NaN,
-            3: np.NaN,
-            4: np.NaN,
-            5: np.NaN,
-            6: np.NaN,
-            7: np.NaN,
-            8: np.NaN,
-            9: np.NaN,
-        },
-        "Funding & use of financial resources": {
-            0: np.NaN,
-            1: np.NaN,
-            2: np.NaN,
-            3: np.NaN,
-            4: np.NaN,
-            5: np.NaN,
-            6: np.NaN,
-            7: np.NaN,
-            8: np.NaN,
-            9: np.NaN,
-        },
-        "Non-specific praise for staff": {
-            0: np.NaN,
-            1: np.NaN,
-            2: np.NaN,
-            3: np.NaN,
-            4: np.NaN,
-            5: np.NaN,
-            6: np.NaN,
-            7: np.NaN,
-            8: np.NaN,
-            9: np.NaN,
-        },
-        "Non-specific dissatisfaction with staff": {
-            0: np.NaN,
-            1: np.NaN,
-            2: np.NaN,
-            3: np.NaN,
-            4: np.NaN,
-            5: np.NaN,
-            6: np.NaN,
-            7: np.NaN,
-            8: np.NaN,
-            9: np.NaN,
-        },
-        "Staff manner & personal attributes": {
-            0: np.NaN,
-            1: np.NaN,
-            2: np.NaN,
-            3: np.NaN,
-            4: np.NaN,
-            5: np.NaN,
-            6: np.NaN,
-            7: np.NaN,
-            8: np.NaN,
-            9: np.NaN,
-        },
-        "Number & deployment of staff": {
-            0: np.NaN,
-            1: np.NaN,
-            2: np.NaN,
-            3: np.NaN,
-            4: np.NaN,
-            5: np.NaN,
-            6: np.NaN,
-            7: np.NaN,
-            8: np.NaN,
-            9: np.NaN,
-        },
-        "Staff responsiveness": {
-            0: np.NaN,
-            1: np.NaN,
-            2: np.NaN,
-            3: np.NaN,
-            4: np.NaN,
-            5: np.NaN,
-            6: np.NaN,
-            7: np.NaN,
-            8: np.NaN,
-            9: np.NaN,
-        },
-        "Staff continuity": {
-            0: np.NaN,
-            1: np.NaN,
-            2: np.NaN,
-            3: np.NaN,
-            4: np.NaN,
-            5: np.NaN,
-            6: np.NaN,
-            7: np.NaN,
-            8: np.NaN,
-            9: np.NaN,
-        },
-        "Competence & training": {
-            0: np.NaN,
-            1: np.NaN,
-            2: np.NaN,
-            3: np.NaN,
-            4: np.NaN,
-            5: np.NaN,
-            6: np.NaN,
-            7: np.NaN,
-            8: np.NaN,
-            9: np.NaN,
-        },
-        "Unspecified communication": {
-            0: np.NaN,
-            1: np.NaN,
-            2: np.NaN,
-            3: np.NaN,
-            4: np.NaN,
-            5: np.NaN,
-            6: np.NaN,
-            7: np.NaN,
-            8: np.NaN,
-            9: np.NaN,
-        },
-        "Staff listening, understanding & involving patients": {
-            0: np.NaN,
-            1: np.NaN,
-            2: np.NaN,
-            3: np.NaN,
-            4: np.NaN,
-            5: np.NaN,
-            6: np.NaN,
-            7: np.NaN,
-            8: np.NaN,
-            9: np.NaN,
-        },
-        "Information directly from staff during care": {
-            0: np.NaN,
-            1: np.NaN,
-            2: np.NaN,
-            3: np.NaN,
-            4: np.NaN,
-            5: np.NaN,
-            6: np.NaN,
-            7: np.NaN,
-            8: np.NaN,
-            9: np.NaN,
-        },
-        "Information provision & guidance": {
-            0: np.NaN,
-            1: np.NaN,
-            2: np.NaN,
-            3: np.NaN,
-            4: np.NaN,
-            5: np.NaN,
-            6: np.NaN,
-            7: np.NaN,
-            8: np.NaN,
-            9: np.NaN,
-        },
-        "Being kept informed, clarity & consistency of information": {
-            0: np.NaN,
-            1: np.NaN,
-            2: np.NaN,
-            3: np.NaN,
-            4: np.NaN,
-            5: np.NaN,
-            6: np.NaN,
-            7: np.NaN,
-            8: np.NaN,
-            9: np.NaN,
-        },
-        "Service involvement with family/ carers": {
-            0: np.NaN,
-            1: np.NaN,
-            2: np.NaN,
-            3: np.NaN,
-            4: np.NaN,
-            5: np.NaN,
-            6: np.NaN,
-            7: np.NaN,
-            8: np.NaN,
-            9: np.NaN,
-        },
-        "Patient contact with family/ carers": {
-            0: np.NaN,
-            1: np.NaN,
-            2: np.NaN,
-            3: np.NaN,
-            4: np.NaN,
-            5: np.NaN,
-            6: np.NaN,
-            7: np.NaN,
-            8: np.NaN,
-            9: np.NaN,
-        },
-        "Contacting services": {
-            0: np.NaN,
-            1: np.NaN,
-            2: np.NaN,
-            3: np.NaN,
-            4: np.NaN,
-            5: np.NaN,
-            6: np.NaN,
-            7: np.NaN,
-            8: np.NaN,
-            9: np.NaN,
-        },
-        "Appointment arrangements": {
-            0: np.NaN,
-            1: np.NaN,
-            2: np.NaN,
-            3: np.NaN,
-            4: np.NaN,
-            5: np.NaN,
-            6: np.NaN,
-            7: 1.0,
-            8: np.NaN,
-            9: np.NaN,
-        },
-        "Appointment method": {
-            0: np.NaN,
-            1: np.NaN,
-            2: np.NaN,
-            3: np.NaN,
-            4: np.NaN,
-            5: np.NaN,
-            6: np.NaN,
-            7: np.NaN,
-            8: np.NaN,
-            9: np.NaN,
-        },
-        "Timeliness of care": {
-            0: np.NaN,
-            1: np.NaN,
-            2: np.NaN,
-            3: np.NaN,
-            4: 1.0,
-            5: np.NaN,
-            6: np.NaN,
-            7: np.NaN,
-            8: np.NaN,
-            9: np.NaN,
-        },
-        "Pain management": {
-            0: np.NaN,
-            1: np.NaN,
-            2: np.NaN,
-            3: np.NaN,
-            4: np.NaN,
-            5: np.NaN,
-            6: np.NaN,
-            7: np.NaN,
-            8: np.NaN,
-            9: np.NaN,
-        },
-        "Diagnosis & triage": {
-            0: np.NaN,
-            1: np.NaN,
-            2: np.NaN,
-            3: np.NaN,
-            4: np.NaN,
-            5: np.NaN,
-            6: np.NaN,
-            7: np.NaN,
-            8: np.NaN,
-            9: np.NaN,
-        },
-        "Referals & continuity of care": {
-            0: np.NaN,
-            1: np.NaN,
-            2: np.NaN,
-            3: np.NaN,
-            4: np.NaN,
-            5: np.NaN,
-            6: np.NaN,
-            7: np.NaN,
-            8: np.NaN,
-            9: np.NaN,
-        },
-        "Length of stay/ duration of care": {
-            0: np.NaN,
-            1: np.NaN,
-            2: np.NaN,
-            3: np.NaN,
-            4: np.NaN,
-            5: np.NaN,
-            6: np.NaN,
-            7: np.NaN,
-            8: np.NaN,
-            9: np.NaN,
-        },
-        "Discharge": {
-            0: np.NaN,
-            1: np.NaN,
-            2: np.NaN,
-            3: np.NaN,
-            4: np.NaN,
-            5: np.NaN,
-            6: np.NaN,
-            7: np.NaN,
-            8: np.NaN,
-            9: np.NaN,
-        },
-        "Care plans": {
-            0: np.NaN,
-            1: np.NaN,
-            2: np.NaN,
-            3: np.NaN,
-            4: np.NaN,
-            5: np.NaN,
-            6: np.NaN,
-            7: np.NaN,
-            8: np.NaN,
-            9: np.NaN,
-        },
-        "Patient records": {
-            0: np.NaN,
-            1: np.NaN,
-            2: np.NaN,
-            3: np.NaN,
-            4: np.NaN,
-            5: np.NaN,
-            6: np.NaN,
-            7: np.NaN,
-            8: np.NaN,
-            9: np.NaN,
-        },
-        "Links with non-NHS organisations": {
-            0: np.NaN,
-            1: np.NaN,
-            2: np.NaN,
-            3: np.NaN,
-            4: np.NaN,
-            5: np.NaN,
-            6: np.NaN,
-            7: np.NaN,
-            8: np.NaN,
-            9: np.NaN,
-        },
-        "Cleanliness, tidiness & infection control": {
-            0: np.NaN,
-            1: np.NaN,
-            2: np.NaN,
-            3: np.NaN,
-            4: np.NaN,
-            5: np.NaN,
-            6: np.NaN,
-            7: np.NaN,
-            8: np.NaN,
-            9: np.NaN,
-        },
-        "Safety & security": {
-            0: np.NaN,
-            1: np.NaN,
-            2: np.NaN,
-            3: np.NaN,
-            4: np.NaN,
-            5: np.NaN,
-            6: np.NaN,
-            7: np.NaN,
-            8: np.NaN,
-            9: np.NaN,
-        },
-        "Provision of medical equipment": {
-            0: np.NaN,
-            1: np.NaN,
-            2: np.NaN,
-            3: np.NaN,
-            4: np.NaN,
-            5: np.NaN,
-            6: np.NaN,
-            7: np.NaN,
-            8: np.NaN,
-            9: np.NaN,
-        },
-        "Service location": {
-            0: np.NaN,
-            1: np.NaN,
-            2: np.NaN,
-            3: np.NaN,
-            4: np.NaN,
-            5: np.NaN,
-            6: np.NaN,
-            7: np.NaN,
-            8: np.NaN,
-            9: np.NaN,
-        },
-        "Transport to/ from services": {
-            0: np.NaN,
-            1: np.NaN,
-            2: np.NaN,
-            3: np.NaN,
-            4: np.NaN,
-            5: np.NaN,
-            6: np.NaN,
-            7: np.NaN,
-            8: np.NaN,
-            9: np.NaN,
-        },
-        "Parking": {
-            0: np.NaN,
-            1: np.NaN,
-            2: np.NaN,
-            3: np.NaN,
-            4: np.NaN,
-            5: np.NaN,
-            6: np.NaN,
-            7: np.NaN,
-            8: np.NaN,
-            9: np.NaN,
-        },
-        "Electronic entertainment": {
-            0: np.NaN,
-            1: np.NaN,
-            2: np.NaN,
-            3: np.NaN,
-            4: np.NaN,
-            5: np.NaN,
-            6: np.NaN,
-            7: np.NaN,
-            8: np.NaN,
-            9: np.NaN,
-        },
-        "Feeling safe": {
-            0: np.NaN,
-            1: np.NaN,
-            2: np.NaN,
-            3: np.NaN,
-            4: np.NaN,
-            5: np.NaN,
-            6: np.NaN,
-            7: np.NaN,
-            8: np.NaN,
-            9: np.NaN,
-        },
-        "Patient appearance & grooming": {
-            0: np.NaN,
-            1: np.NaN,
-            2: np.NaN,
-            3: np.NaN,
-            4: np.NaN,
-            5: np.NaN,
-            6: np.NaN,
-            7: np.NaN,
-            8: np.NaN,
-            9: np.NaN,
-        },
-        "Mental Health Act": {
-            0: np.NaN,
-            1: np.NaN,
-            2: np.NaN,
-            3: np.NaN,
-            4: np.NaN,
-            5: np.NaN,
-            6: np.NaN,
-            7: np.NaN,
-            8: np.NaN,
-            9: np.NaN,
-        },
-        "Equality, Diversity & Inclusion": {
-            0: np.NaN,
-            1: np.NaN,
-            2: np.NaN,
-            3: np.NaN,
-            4: np.NaN,
-            5: np.NaN,
-            6: np.NaN,
-            7: np.NaN,
-            8: np.NaN,
-            9: np.NaN,
-        },
-        "Admission": {
-            0: np.NaN,
-            1: np.NaN,
-            2: np.NaN,
-            3: np.NaN,
-            4: np.NaN,
-            5: np.NaN,
-            6: np.NaN,
-            7: np.NaN,
-            8: np.NaN,
-            9: np.NaN,
-        },
-        "Collecting patients feedback": {
-            0: np.NaN,
-            1: np.NaN,
-            2: np.NaN,
-            3: np.NaN,
-            4: np.NaN,
-            5: np.NaN,
-            6: np.NaN,
-            7: np.NaN,
-            8: np.NaN,
-            9: np.NaN,
-        },
-        "Column to outline any additional comment content which doesn't map to a topic.": {
-            0: np.NaN,
-            1: np.NaN,
-            2: np.NaN,
-            3: np.NaN,
-            4: np.NaN,
-            5: np.NaN,
-            6: np.NaN,
-            7: np.NaN,
-            8: np.NaN,
-            9: np.NaN,
-        },
-        "Safeguarding concerns?": {
-            0: np.NaN,
-            1: np.NaN,
-            2: np.NaN,
-            3: np.NaN,
-            4: np.NaN,
-            5: np.NaN,
-            6: np.NaN,
-            7: np.NaN,
-            8: np.NaN,
-            9: np.NaN,
-        },
-        "Labelling not possible": {
-            0: 1.0,
-            1: 0.0,
-            2: 1.0,
-            3: 0.0,
-            4: 0.0,
-            5: 1.0,
-            6: 0.0,
-            7: 0.0,
-            8: 0.0,
-            9: 0.0,
-        },
-        "Environment & Facilities": {
-            0: np.NaN,
-            1: np.NaN,
-            2: np.NaN,
-            3: np.NaN,
-            4: np.NaN,
-            5: np.NaN,
-            6: np.NaN,
-            7: np.NaN,
-            8: np.NaN,
-            9: np.NaN,
-        },
-        "Supplying & understanding medication": {
-            0: np.NaN,
-            1: np.NaN,
-            2: np.NaN,
-            3: np.NaN,
-            4: np.NaN,
-            5: np.NaN,
-            6: np.NaN,
-            7: np.NaN,
-            8: np.NaN,
-            9: np.NaN,
-        },
-        "Activities & access to fresh air": {
-            0: np.NaN,
-            1: np.NaN,
-            2: np.NaN,
-            3: np.NaN,
-            4: np.NaN,
-            5: np.NaN,
-            6: np.NaN,
-            7: np.NaN,
-            8: np.NaN,
-            9: np.NaN,
-        },
-        "Food & drink provision & facilities": {
-            0: np.NaN,
-            1: np.NaN,
-            2: np.NaN,
-            3: np.NaN,
-            4: np.NaN,
-            5: np.NaN,
-            6: np.NaN,
-            7: np.NaN,
-            8: np.NaN,
-            9: np.NaN,
-        },
-        "Sensory experience": {
-            0: np.NaN,
-            1: np.NaN,
-            2: np.NaN,
-            3: np.NaN,
-            4: np.NaN,
-            5: np.NaN,
-            6: np.NaN,
-            7: np.NaN,
-            8: np.NaN,
-            9: np.NaN,
-        },
-        "Impact of treatment/ care": {
-            0: np.NaN,
-            1: np.NaN,
-            2: np.NaN,
-            3: np.NaN,
-            4: np.NaN,
-            5: np.NaN,
-            6: np.NaN,
-            7: np.NaN,
-            8: np.NaN,
-            9: np.NaN,
-        },
-        "Psychological therapy arrangements": {
-            0: np.NaN,
-            1: np.NaN,
-            2: np.NaN,
-            3: np.NaN,
-            4: np.NaN,
-            5: np.NaN,
-            6: np.NaN,
-            7: np.NaN,
-            8: np.NaN,
-            9: np.NaN,
-        },
-        "Existence of services": {
-            0: np.NaN,
-            1: np.NaN,
-            2: np.NaN,
-            3: np.NaN,
-            4: np.NaN,
-            5: np.NaN,
-            6: np.NaN,
-            7: np.NaN,
-            8: np.NaN,
-            9: np.NaN,
-        },
-        "Choice of services": {
-            0: np.NaN,
-            1: np.NaN,
-            2: np.NaN,
-            3: np.NaN,
-            4: np.NaN,
-            5: np.NaN,
-            6: np.NaN,
-            7: np.NaN,
-            8: np.NaN,
-            9: np.NaN,
-        },
-        "Out of hours support (community services)": {
-            0: np.NaN,
-            1: np.NaN,
-            2: np.NaN,
-            3: np.NaN,
-            4: np.NaN,
-            5: np.NaN,
-            6: np.NaN,
-            7: np.NaN,
-            8: np.NaN,
-            9: np.NaN,
-        },
-        "Learning organisation": {
-            0: np.NaN,
-            1: np.NaN,
-            2: np.NaN,
-            3: np.NaN,
-            4: np.NaN,
-            5: np.NaN,
-            6: np.NaN,
-            7: np.NaN,
-            8: np.NaN,
-            9: np.NaN,
-        },
-    }
+    cols = [
+        "Comment ID",
+        "Trust",
+        "Respondent ID",
+        "Date",
+        "Service Type 1",
+        "Service type 2",
+        "FFT categorical answer",
+        "FFT question",
+        "FFT answer",
+        "Comment sentiment",
+    ]
+    cols.extend(minor_cats)
+    data_dict = {}
+    for col in cols:
+        row = []
+        if col not in minor_cats:
+            if col in ["FFT categorical answer", "Comment sentiment"]:
+                for i in range(5):
+                    row.append(random.randint(1, 5))
+            elif col == "FFT question":
+                for i in range(5):
+                    row.append(random.choice(list(q_map.keys())))
+            else:
+                for i in range(5):
+                    row.append(
+                        "".join(
+                            random.choices(string.ascii_uppercase + string.digits, k=5)
+                        )
+                    )
+        else:
+            for i in range(5):
+                row.append(random.choice([np.NaN, 1]))
+        data_dict[col] = row
     data = pd.DataFrame(data_dict)
     return data