In [1]:
from sqlalchemy import create_engine
from sqlalchemy import exc
from sqlalchemy.engine import Engine
import pandas as pd
import cx_Oracle

## Sentiment Analysis

- Python Library : vaderSentiment (Valence Aware Dictionary and Sentiment Reasoner)
- Implementation workflow : SQL -> CSV -> Python -> CSV -> Power Query Engine -> PowerBI 

In [None]:
try:
    connection: Engine = create_engine(
        f"oracle://user:password(DESCRIPTION=(ADDRESS_LIST=(ADDRESS=(PROTOCOL=TCP)(HOST=host_url)(PORT=port_number)))(CONNECT_DATA=(SERVICE_NAME=service_url)))")
except exc.SQLAlchemyError as e:
    print('SQLAlchemy database connection dropped from create_engine. {}'.format(e))
    raise

sql = """
    select
    t.OR_CASE_ID,
    t.PAT_ID,
    t.PREOP_PPO_RSP_CSN_ID,
    t.PREOP_CONTACT_DATE,
    t.PREOP_PAIN_RELIEF,
    t.PREOP_SURGERY_OUTCOME,
    t.M3_PPO_RSP_CSN_ID,
    t.M3_CONTACT_DATE,
    t.M3_PAIN_RELIEF,
    t.M3_SURGERY_OUTCOME,
    t.M3_IMP_OUTCOME,
    t.Y1_PPO_RSP_CSN_ID,
    t.Y1_CONTACT_DATE,
    t.Y1_POSTOP_CARE,
    t.Y1_SURGERY_OUTCOME,
    t.Y1_IMP_OUTCOME
    from CDPD.PROE_ELIG_RSP_CALCS_MV t
    where t.ppo_preop_m3_y1 like '%1%'
"""

df = pd.read_sql(sql=sql, con=connection)
df.to_csv('epic_patient_outcomes.csv')

In [17]:
from dataclasses import dataclass, field, fields
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import pandas as pd

@dataclass
class PatientResponseRatings:
    outcome_response: list = field(default_factory=lambda : [])
    total_sentiment_data: list = field(default_factory=lambda : [])
    negative_rating: list = field(default_factory=lambda : [])
    neutral_rating: list = field(default_factory=lambda : [])
    positive_rating: list = field(default_factory=lambda : [])
    overall_rating: list = field(default_factory=lambda : [])
    patient_id: list = field(default_factory=lambda : [])
    or_case_id: list = field(default_factory=lambda : [])

class PatientResponses:
    SID_OBJ = SentimentIntensityAnalyzer()
    PATIENT_DF = pd.read_csv('epic_patient_outcomes.csv')

    def __init__(self, column_name: str=None, data_class: object=None) -> None:
        self.column_name = column_name
        self.data_class = data_class
        self.__output_df = None

    def sentiment_analysis(self) -> None:
        sent_df = pd.DataFrame()
        column_ = self.column_name

        sent_df[column_] = __class__.PATIENT_DF[column_]
        sent_df[column_] = __class__.PATIENT_DF[column_]
        sent_df['pat_id'] = __class__.PATIENT_DF['pat_id']
        sent_df['or_case_id'] = __class__.PATIENT_DF['or_case_id']
 
        for row in sent_df.index:
            sentiment_dict = __class__.SID_OBJ.polarity_scores(str(sent_df[column_][row]))

            if sentiment_dict['compound'] >= 0.05 :
                end = "Positive"
            elif sentiment_dict['compound'] <= - 0.05 :
                end = "Negative"
            else :
                end = "Neutral" 

            self.data_class.outcome_response.append(sent_df[column_][row])
            self.data_class.total_sentiment_data.append(sentiment_dict)
            self.data_class.negative_rating.append(f"{round(sentiment_dict['neg']*100, 2)}%")
            self.data_class.neutral_rating.append(f"{round(sentiment_dict['neu']*100, 2)}%")
            self.data_class.positive_rating.append(f"{round(sentiment_dict['pos']*100, 2)}%")
            self.data_class.overall_rating.append(f"{end}")  
            self.data_class.patient_id.append(sent_df['pat_id'][row])
            self.data_class.or_case_id.append(sent_df['or_case_id'][row])
      
    def create_dataframe(self) -> None:
        dict_ = {i.name: getattr(self.data_class, i.name) for i in fields(self.data_class)} 
        self.__output_df = pd.DataFrame(dict_)

    def create_csv_file(self, filename: str=None) -> None:
        self.__output_df.to_csv(f'{filename}.csv')

    def return_dataframe(self):
        return self.__output_df

In [19]:
"""Preop Responses"""
preop = PatientResponseRatings()
preop_ = PatientResponses('preop_surgery_outcome', preop)
preop_.sentiment_analysis()
preop_.create_dataframe()
preop_df = preop_.return_dataframe()
# preop_.create_csv_file('_patient_preop_response_ratings')

"""Month Three Responses"""
m3_imp = PatientResponseRatings()
m3 = PatientResponses('m3_imp_outcome', m3_imp)
m3.sentiment_analysis()
m3.create_dataframe()
m3_df = preop_.return_dataframe()
# m3.create_csv_file('_patient_m3_imp_response_ratings')

"""Year One Responses"""
y1_imp = PatientResponseRatings()
y1 = PatientResponses('y1_imp_outcome', y1_imp)
y1.sentiment_analysis()
y1.create_dataframe()
y1_df = preop_.return_dataframe()
# y1.create_csv_file('_patient_y1_imp_response_ratings')

all_responses_df = pd.concat((preop_df, m3_df, y1_df), axis=0, ignore_index=True)
all_responses_df.to_csv('all_patient_responses.csv')

print(all_responses_df)

                                        outcome_response  \
0                                       BETTER MOBILITY    
1        HIKIE DANCE YOGA STAIRS STAND KNEEL RIDE HORSE    
2      GETTING MY LIFE BACK- TRAVELING, HIKING, DANCI...   
3                                  FLEXION AND MOBILITY    
4        ABILITY TO WALK LONGER DISTANCES WHEN TRAVELING   
...                                                  ...   
19963                                  BETTER FUNCTION.    
19964    BEING ABLE TO WALK, GO UP & DOWN STAIRS,  SQUAT   
19965     NORMAL AVERAGE EVERY DAY ACTIVITIES FOR MY AGE   
19966                                           MOBILITY   
19967                                 WALKING AND STAIRS   

                                    total_sentiment_data negative_rating  \
0      {'neg': 0.0, 'neu': 0.256, 'pos': 0.744, 'comp...            0.0%   
1      {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...            0.0%   
2      {'neg': 0.0, 'neu': 0.916, 'pos': 0.084, 'co

## Machine Learning 
- Classifying Patient Responses (Orthopedic Knee Surgeries)
- Python libraries : scikit-learn & spaCy

In [10]:
from sklearn import svm
import spacy

class Category:
    PAIN_MANAGEMENT = "PAIN_MANAGEMENT"
    IMPROVED_FUNCTION = "IMPROVED_FUNCTION"
    QUALITY_OF_LIFE = "QUALITY_OF_LIFE"
    ADVERSE_EVENTS = "ADVERSE_EVENTS"
 
class PatientResponseModel(Category):
    TRAIN_X = [
        "Improve mobility, motion and strength", 
        "Moving without the fear of falling", 
        "Walking pain free", 
        "Better life expectations"
        ]

    NLP = spacy.load("en_core_web_md")
    
    def __init__(self, patient_response: str = None) -> None:
        super().__init__()
        self.patient_response = patient_response
        self._test_x_word_vectors = None
        self._train_x_word_vectors = None
        self.prediction = None
        self.train_y = [
            self.IMPROVED_FUNCTION, 
            self.ADVERSE_EVENTS, 
            self.PAIN_MANAGEMENT, 
            self.QUALITY_OF_LIFE
            ]

    def vectorization(self):
        test_x = [self.patient_response]
        test_docs = [__class__.NLP(text) for text in test_x]
        self.test_x_word_vectors = [x.vector for x in test_docs]

    def predict_classification_category(self):
        clf_svm = svm.SVC(kernel='linear')
        clf_svm.fit(self.train_x_word_vectors, self.train_y)
        self.prediction = clf_svm.predict(self.test_x_word_vectors)
        
    def model_training(self):
        docs = [__class__.NLP(text.upper()) for text in __class__.TRAIN_X]
        self.train_x_word_vectors = [x.vector for x in docs]

    def __repr__(self) -> str:
        return f"{self.prediction}"
    

In [11]:
p = PatientResponseModel("ABILITY TO WORK, EVEN WITH SOME PAIN")
p.vectorization()
p.model_training()
p.predict_classification_category()
print(p)

['IMPROVED_FUNCTION']


In [12]:
p = PatientResponseModel("TO ACHIEVE VERY LITTLE STIFFNESS AFTER SITTING")
p.vectorization()
p.model_training()
p.predict_classification_category()
print(p)


['QUALITY_OF_LIFE']


In [13]:
p = PatientResponseModel("BEING ABLE TO WALK ON MY OWN WITH NO HELP")
p.vectorization()
p.model_training()
p.predict_classification_category()
print(p)

['QUALITY_OF_LIFE']


In [14]:
p = PatientResponseModel("KNEEL ON MY KNEES W/O PAIN, AND DO STAIRS PAINFREE")
p.vectorization()
p.model_training()
p.predict_classification_category()
print(p)

['IMPROVED_FUNCTION']


In [15]:
p = PatientResponseModel("OVERCOME SURGICAL PAIN")
p.vectorization()
p.model_training()
p.predict_classification_category()
print(p)

['ADVERSE_EVENTS']


In [16]:
p = PatientResponseModel("LESS PAIN/STIFFNESS DURING NIGHT!")
p.vectorization()
p.model_training()
p.predict_classification_category()
print(p)

['PAIN_MANAGEMENT']
