Skip to content

Commit

Permalink
Merge pull request #108 from CDU-data-science-team/107_v6_framework
Browse files Browse the repository at this point in the history
107 v6 framework
  • Loading branch information
yiwen-h committed Jun 12, 2023
2 parents 871252a + eb7dcf3 commit e078e12
Show file tree
Hide file tree
Showing 5 changed files with 158 additions and 902 deletions.
69 changes: 60 additions & 9 deletions api/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,60 @@
predict_multilabel_sklearn,
)

minor_cats_v5 = [
"Gratitude/ good experience",
"Negative experience",
"Not assigned",
"Organisation & efficiency",
"Funding & use of financial resources",
"Non-specific praise for staff",
"Non-specific dissatisfaction with staff",
"Staff manner & personal attributes",
"Number & deployment of staff",
"Staff responsiveness",
"Staff continuity",
"Competence & training",
"Unspecified communication",
"Staff listening, understanding & involving patients",
"Information directly from staff during care",
"Information provision & guidance",
"Being kept informed, clarity & consistency of information",
"Service involvement with family/ carers",
"Patient contact with family/ carers",
"Contacting services",
"Appointment arrangements",
"Appointment method",
"Timeliness of care",
"Pain management",
"Diagnosis & triage",
"Referals & continuity of care",
"Length of stay/ duration of care",
"Discharge",
"Care plans",
"Patient records",
"Links with non-NHS organisations",
"Cleanliness, tidiness & infection control",
"Safety & security",
"Provision of medical equipment",
"Service location",
"Transport to/ from services",
"Parking",
"Electronic entertainment",
"Feeling safe",
"Patient appearance & grooming",
"Mental Health Act",
"Equality, Diversity & Inclusion",
"Admission",
"Collecting patients feedback",
"Labelling not possible",
"Environment & Facilities",
"Supplying & understanding medication",
"Activities & access to fresh air",
"Food & drink provision & facilities",
"Sensory experience",
"Impact of treatment/ care",
]

description = """
This API is for classifying patient experience qualitative data,
utilising the models trained as part of the pxtextmining project.
Expand All @@ -28,11 +82,8 @@ class Test(BaseModel):
test: str

class Config:
schema_extra = {
"example": {
"test": "Hello"
}
}
schema_extra = {"example": {"test": "Hello"}}


class ItemIn(BaseModel):
comment_id: str
Expand Down Expand Up @@ -85,16 +136,16 @@ class Config:
"name": "MIT License",
"url": "https://github.com/CDU-data-science-team/pxtextmining/blob/main/LICENSE",
},
openapi_tags=tags_metadata
openapi_tags=tags_metadata,
)


@app.get("/", response_model=Test, tags=['index'])
@app.get("/", response_model=Test, tags=["index"])
def index():
return {"test": "Hello"}


@app.post("/predict_multilabel", response_model=List[ItemOut], tags=['predict'])
@app.post("/predict_multilabel", response_model=List[ItemOut], tags=["predict"])
def predict(items: List[ItemIn]):
"""Accepts comment ids, comment text and question type as JSON in a POST request. Makes predictions using trained SVC model.
Expand Down Expand Up @@ -128,7 +179,7 @@ def predict(items: List[ItemIn]):
with open(model_path, "rb") as model:
loaded_model = pickle.load(model)
preds_df = predict_multilabel_sklearn(
text_to_predict, loaded_model, additional_features=True
text_to_predict, loaded_model, labels=minor_cats_v5, additional_features=True
)
# Join predicted labels with received data
preds_df["comment_id"] = preds_df.index.astype(str)
Expand Down
8 changes: 6 additions & 2 deletions pxtextmining/factories/factory_data_load_and_split.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,6 @@ def load_multilabel_data(filename, target="major_categories"):
cols = ["Comment sentiment"]
# Sort out the features first
features_df = raw_data.loc[:, features].copy()
features_df = clean_empty_features(features_df)
# Standardize FFT qs
features_df['FFT question'] = features_df['FFT question'].fillna('nonspecific')
features_df.loc[:, "FFT_q_standardised"] = (
Expand All @@ -135,6 +134,7 @@ def load_multilabel_data(filename, target="major_categories"):
features_df.loc[:, "text_length"] = features_df.loc[:, "FFT answer"].apply(
lambda x: len([word for word in str(x).split(" ") if word != ""])
)
features_df = clean_empty_features(features_df)
# Sort out the targets
targets_df = raw_data.loc[:, cols].copy()
targets_df = targets_df.replace("1", 1)
Expand Down Expand Up @@ -206,7 +206,11 @@ def process_data(df, target, preprocess_text=True, additional_features=False):
X = clean_empty_features(X)
print(f"After preprocessing, shape of X is {X.shape}")
if preprocess_text == False:
X = df["FFT answer"].astype(str)
X_temp = df["FFT answer"].astype(str).apply(remove_punc_and_nums)
X_temp = clean_empty_features(X_temp)
print(f"After preprocessing, shape of X is {X_temp.shape}")
indices = X_temp.index
X = df["FFT answer"].astype(str).filter(indices)
if additional_features == True:
X = pd.merge(X, df[["FFT_q_standardised"]], left_index=True, right_index=True)
X = X.reset_index()
Expand Down
88 changes: 40 additions & 48 deletions pxtextmining/params.py
Original file line number Diff line number Diff line change
@@ -1,38 +1,36 @@
dataset = "datasets/hidden/merged_20230602.csv"
dataset = "datasets/hidden/merged_230608.csv"

random_state = 99
random_state = 42

model_name = "distilbert-base-uncased"

q_map = {
"Please tell us why": "nonspecific",
"Please tells us why you gave this answer?": "nonspecific",
"FFT Why?": "nonspecific",
"What was good?": "what_good",
"Is there anything we could have done better?": "could_improve",
"How could we improve?": "could_improve",
"What could we do better?": "could_improve",
"Please can you tell us why you gave your answer and what we could have done better?": "nonspecific",
"Please describe any things about the 111 service that\r\nyou were particularly satisfied and/or dissatisfied with": "nonspecific",
"Please describe any things about the 111 service that \nyou were particularly satisfied and/or dissatisfied with": "nonspecific",
"Please describe any things about the 111 service that\nyou were particularly satisfied and/or dissatisfied with": 'nonspecific',
"Nonspecific": 'nonspecific',
"nonspecific": 'nonspecific'
}
"Please tell us why": "nonspecific",
"Please tells us why you gave this answer?": "nonspecific",
"FFT Why?": "nonspecific",
"What was good?": "what_good",
"Is there anything we could have done better?": "could_improve",
"How could we improve?": "could_improve",
"What could we do better?": "could_improve",
"Please can you tell us why you gave your answer and what we could have done better?": "nonspecific",
"Please describe any things about the 111 service that\r\nyou were particularly satisfied and/or dissatisfied with": "nonspecific",
"Please describe any things about the 111 service that \nyou were particularly satisfied and/or dissatisfied with": "nonspecific",
"Please describe any things about the 111 service that\nyou were particularly satisfied and/or dissatisfied with": "nonspecific",
"Nonspecific": "nonspecific",
"nonspecific": "nonspecific",
}

# v6
major_cat_dict = {
"General": [
"Labelling not possible",
"Gratitude/ good experience",
"Negative experience",
"Positive experience & gratitude",
"Negative experience & dissatisfaction",
"Not assigned",
"Organisation & efficiency",
"Funding & use of financial resources",
"Collecting patients feedback",
],
"Staff": [
"Non-specific praise for staff",
"Non-specific dissatisfaction with staff",
"Staff manner & personal attributes",
"Number & deployment of staff",
"Staff responsiveness",
Expand All @@ -45,8 +43,7 @@
"Information directly from staff during care",
"Information provision & guidance",
"Being kept informed, clarity & consistency of information",
"Service involvement with family/ carers",
"Patient contact with family/ carers",
"Interaction with family/ carers",
],
"Access to medical care & support": [
"Contacting services",
Expand All @@ -59,12 +56,10 @@
"Diagnosis & triage",
"Referals & continuity of care",
"Admission",
"Length of stay/ duration of care",
"Discharge",
"Care plans",
"Patient records",
"Impact of treatment/ care",
"Links with non-NHS organisations",
],
"Food & diet": ["Food & drink provision & facilities"],
"Category TBC": [
Expand All @@ -77,7 +72,6 @@
"Cleanliness, tidiness & infection control",
"Sensory experience",
"Environment & Facilities",
"Safety & security",
"Provision of medical equipment",
],
"Mental Health specifics": ["Mental Health Act"],
Expand All @@ -93,12 +87,12 @@
# v6 20230602
merged_minor_cats = [
"Gratitude/ good experience",
# "Negative experience",
# "Negative experience",
"Not assigned",
"Organisation & efficiency",
# "Funding & use of financial resources",
# "Funding & use of financial resources",
"Non-specific praise for staff",
# "Non-specific dissatisfaction with staff",
# "Non-specific dissatisfaction with staff",
"Staff manner & personal attributes",
"Number & deployment of staff",
"Staff responsiveness",
Expand All @@ -109,23 +103,23 @@
"Information directly from staff during care",
"Information provision & guidance",
"Being kept informed, clarity & consistency of information",
# "Service involvement with family/ carers",
# "Patient contact with family/ carers",
# "Service involvement with family/ carers",
# "Patient contact with family/ carers",
"Contacting services",
"Appointment arrangements",
"Appointment method",
"Timeliness of care",
"Pain management",
"Diagnosis & triage",
"Referals & continuity of care",
# "Length of stay/ duration of care",
# "Length of stay/ duration of care",
"Discharge",
"Care plans",
# "Patient records",
# "Links with non-NHS organisations",
# "Patient records",
# "Links with non-NHS organisations",
"Cleanliness, tidiness & infection control",
"Safety & security",
# "Provision of medical equipment",
# "Provision of medical equipment",
"Service location",
"Transport to/ from services",
"Parking",
Expand All @@ -135,7 +129,7 @@
"Mental Health Act",
"Equality, Diversity & Inclusion",
"Admission",
# "Collecting patients feedback",
# "Collecting patients feedback",
"Labelling not possible",
"Environment & Facilities",
"Supplying & understanding medication",
Expand All @@ -144,18 +138,14 @@
"Sensory experience",
"Impact of treatment/ care",
"Negative experience/ dissatisfaction",
"Family/ carers"
"Family/ carers",
]

# v5 20230419
# v6 20230806
minor_cats = [
"Gratitude/ good experience",
"Negative experience",
"Not assigned",
"Organisation & efficiency",
"Funding & use of financial resources",
"Non-specific praise for staff",
"Non-specific dissatisfaction with staff",
"Staff manner & personal attributes",
"Number & deployment of staff",
"Staff responsiveness",
Expand All @@ -166,22 +156,17 @@
"Information directly from staff during care",
"Information provision & guidance",
"Being kept informed, clarity & consistency of information",
"Service involvement with family/ carers",
"Patient contact with family/ carers",
"Contacting services",
"Appointment arrangements",
"Appointment method",
"Timeliness of care",
"Pain management",
"Diagnosis & triage",
"Referals & continuity of care",
"Length of stay/ duration of care",
"Discharge",
"Care plans",
"Patient records",
"Links with non-NHS organisations",
"Cleanliness, tidiness & infection control",
"Safety & security",
"Provision of medical equipment",
"Service location",
"Transport to/ from services",
Expand All @@ -192,14 +177,21 @@
"Mental Health Act",
"Equality, Diversity & Inclusion",
"Admission",
"Collecting patients feedback",
"Labelling not possible",
"Environment & Facilities",
"Supplying & understanding medication",
"Activities & access to fresh air",
"Food & drink provision & facilities",
"Sensory experience",
"Impact of treatment/ care",
# "Psychological therapy arrangements",
# "Existence of services",
# "Choice of services",
# "Out of hours support (community services)",
# "Learning organisation",
"Interaction with family/ carers",
"Negative experience & dissatisfaction",
"Positive experience & gratitude",
]

sentiment_dict = {
Expand Down
23 changes: 12 additions & 11 deletions pxtextmining/pipelines/multilabel_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,15 +357,16 @@ def run_two_layer_sklearn_pipeline(


if __name__ == "__main__":
run_bert_pipeline(additional_features = True, path = 'test_multilabel/merged_cats_0607', target = merged_minor_cats)
run_sklearn_pipeline(additional_features = True, target= merged_minor_cats, models_to_try = ["xgb"], path = 'test_multilabel/merged_cats_0607/xgb',
include_analysis=True)
run_svc_pipeline(
additional_features=True,
target=merged_minor_cats,
path="test_multilabel/merged_cats_0607/svc",
include_analysis=True
)
run_sklearn_pipeline(additional_features = True, target= merged_minor_cats, models_to_try = ["svm"], path = 'test_multilabel/merged_cats_0607/svc_2',
include_analysis=True)

# run_sklearn_pipeline(additional_features = True, target= minor_cats, models_to_try = ["xgb"], path = 'test_multilabel/v6_230806/xgb',
# include_analysis=True)
# run_svc_pipeline(
# additional_features=True,
# target=minor_cats,
# path="test_multilabel/v6_230806/svc",
# include_analysis=True
# )
run_bert_pipeline(additional_features = True, path = 'test_multilabel/v6_230806', target = minor_cats, include_analysis=True)
# run_sklearn_pipeline(additional_features = True, target= minor_cats, models_to_try = ["svm"], path = 'test_multilabel/v6_230806/svc_2',
# include_analysis=True)
# run_two_layer_sklearn_pipeline()
Loading

0 comments on commit e078e12

Please sign in to comment.