In [None]:
import pandas as pd 

df = pd.read_csv('main_data/new_contradictions_df.csv')

In [None]:
len(df['contraindication_result'].unique())

In [None]:
import pandas as pd
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForTokenClassification

tokenizer = AutoTokenizer.from_pretrained("d4data/biomedical-ner-all")
model = AutoModelForTokenClassification.from_pretrained("d4data/biomedical-ner-all")

class DRUG_CLASS_CLASSIFICATION():

    def __init__(self):
        self.pipe = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple", device=0) # pass device=0 if using gpu
        df = pd.read_csv('main_data/new_contradictions_df.csv')
        drug_class_list = df['contraindications_description'].unique().tolist()
        # Remove nan values from the list
        drug_class_list = [item for item in drug_class_list if not pd.isna(item)]
        self.all_details = drug_class_list

    def classify_presciption_drugs(self):
        all_results = []
        all_details = self.all_details
        print(f'All Details: {all_details}')    
        # Pass the entire all_details list to the pipeline
        full_results = self.pipe(all_details)
        print(f'Results: {full_results}')
        for sentence_result, detail in zip(full_results, all_details):
            for result in sentence_result:
                if result is not None:
                    print(f'Result: {result}')
                    if len(str(result['word'])) >= 4 and '#' not in str(result['word']):
                        all_results.append({'Entity Group': result['entity_group'], 'Word': str(result['word']), 'Score': result['score'], 'Sentence': detail})
        # Return an empty DataFrame if no results are found
        if not all_results:
            return pd.DataFrame(columns=['Entity Group', 'Word', 'Score', 'Sentence'])
        return pd.DataFrame(all_results)

# Create an instance of the DRUG_CLASS_CLASSIFICATION class
drug_classifier = DRUG_CLASS_CLASSIFICATION()

# Classify the prescription drugs and save results in a pandas DataFrame
results_df = drug_classifier.classify_presciption_drugs()



In [None]:
results_df.to_csv('new_data/djang_base_contradiction_1.csv', index=False)

In [29]:
results_df = pd.read_csv('new_data/djang_base_contradiction_1.csv')

In [30]:
len(results_df['Entity Group'].unique())

29

In [31]:
df=results_df

In [40]:
new_column_names = {
    'generic_name': 'contraindications_description',
}

# Use the rename method to rename the columns
df.rename(columns=new_column_names, inplace=True)

In [41]:
df=df.astype('str').astype(str).apply(lambda x: x.str.lower())

In [42]:
df

Unnamed: 0,group,indication,score,contraindications_description
0,sign_symptom,hypersensitivity,0.99985325,adalimumab should not be administered to patie...
1,medication,toad,0.9997948,adalimumab should not be administered to patie...
2,medication,acyclovircream,0.9951826,acyclovircream is contraindicated in patients ...
3,sign_symptom,hyper,0.8871652,acyclovircream is contraindicated in patients ...
4,medication,aciclov,0.72187907,aciclovir is contraindicated in patients known...
...,...,...,...,...
6636,severity,severe,0.99654263,zafirlukast should not be given to patients wh...
6637,disease_disorder,renal,0.85930777,zafirlukast should not be given to patients wh...
6638,disease_disorder,hepatitis,0.9997025,zafirlukast should not be given to patients wh...
6639,detailed_description,long,0.98967594,zafirlukast should not be given to patients wh...


In [43]:
specific_df = df[(df['group'] == 'history') | (df['group'] == 'disease_disorder') | (df['group'] == 'medication') | (df['group'] == 'subject' ) | (df['group'] == 'family_history') ]


In [45]:
specific_df.reset_index(inplace=True)

In [49]:
specific_df.drop(columns=['index'], inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  specific_df.drop(columns=['index'], inplace=True)


In [50]:
specific_df

Unnamed: 0,group,indication,score,contraindications_description
0,medication,toad,0.9997948,adalimumab should not be administered to patie...
1,medication,acyclovircream,0.9951826,acyclovircream is contraindicated in patients ...
2,medication,aciclov,0.72187907,aciclovir is contraindicated in patients known...
3,medication,acyclovir,0.9977528,acyclovir is contraindicated in patients known...
4,medication,acyclovir,0.99675465,acyclovir is contraindicated in patients known...
...,...,...,...,...
2743,disease_disorder,hepatitis b,0.99699664,hypersensitivity; abnormally low neutrophil co...
2744,disease_disorder,c virus,0.9962046,hypersensitivity; abnormally low neutrophil co...
2745,medication,zaleplon,0.9051752,zaleplon is contraindicated in patients with k...
2746,disease_disorder,renal,0.85930777,zafirlukast should not be given to patients wh...


In [22]:
full_df= pd.read_csv('new_data/contraindication_result.csv')

In [24]:
selected_columns = ['generic_name', 'contraindications_description','contraindication_result']
full_new_df = full_df[selected_columns]
full_new_df.rename(columns={'side_effects_description': 'side_effects'}, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  full_new_df.rename(columns={'side_effects_description': 'side_effects'}, inplace=True)


In [51]:


merged_df = specific_df.merge(full_new_df, on='contraindications_description', how='left')


In [53]:
full_new_df

Unnamed: 0,generic_name,contraindications_description,contraindication_result
0,adalimumab,adalimumab should not be administered to patie...,hypersensitivity toadalimumab or any of its co...
1,acyclovir + hydrocortisone,there is no known contraindication.,there is no known contraindication
2,acyclovir (topical),acyclovircream is contraindicated in patients ...,hypersensitivity
3,acyclovir (oral),aciclovir is contraindicated in patients known...,hypersensitive to aciclovir
4,acyclovir (ophthalmic),acyclovir is contraindicated in patients known...,acyclovir or valacyclovir
...,...,...,...
1703,zinc oxide,known hypersensitivity to any component of the...,known hypersensitivity to any component of the...
1704,zinc orotate,no data available,no data available
1705,zidovudine,hypersensitivity; abnormally low neutrophil co...,concomitant use with interferon alfa
1706,zaleplon,zaleplon is contraindicated in patients with k...,hypersensitivity to any of its components


In [54]:
merged_df.to_csv('new_data/djang_main_contradiction_1.csv', index=False)

In [55]:
merged_df.replace('nan', np.nan, inplace=True)

Unnamed: 0,group,indication,score,contraindications_description,generic_name,contraindication_result
0,medication,toad,0.9997948,adalimumab should not be administered to patie...,adalimumab,hypersensitivity toadalimumab or any of its co...
1,medication,acyclovircream,0.9951826,acyclovircream is contraindicated in patients ...,acyclovir (topical),hypersensitivity
2,medication,aciclov,0.72187907,aciclovir is contraindicated in patients known...,acyclovir (oral),hypersensitive to aciclovir
3,medication,acyclovir,0.9977528,acyclovir is contraindicated in patients known...,acyclovir (ophthalmic),acyclovir or valacyclovir
4,medication,acyclovir,0.99675465,acyclovir is contraindicated in patients known...,acyclovir (ophthalmic),acyclovir or valacyclovir
...,...,...,...,...,...,...
2762,disease_disorder,hepatitis b,0.99699664,hypersensitivity; abnormally low neutrophil co...,zidovudine,concomitant use with interferon alfa
2763,disease_disorder,c virus,0.9962046,hypersensitivity; abnormally low neutrophil co...,zidovudine,concomitant use with interferon alfa
2764,medication,zaleplon,0.9051752,zaleplon is contraindicated in patients with k...,zaleplon,hypersensitivity to any of its components
2765,disease_disorder,renal,0.85930777,zafirlukast should not be given to patients wh...,zafirlukast,patients with a history of moderate or severe ...


In [56]:
merged_df['group'].unique()

array(['medication', 'history', 'disease_disorder', 'family_history',
       'subject'], dtype=object)

In [59]:
word_replacements = {
    'medication': 'contradicted drug',
    'history': 'contradicted patient history',
    'disease_disorder': 'contradicted disease',
    'family_history': 'contradicted family history',
    'subject': 'contradicted subject'
}

# Use the replace() method to replace the words in the 'column_name' column
merged_df['group'] = merged_df['group'].replace(word_replacements, regex=True)


In [61]:
merged_df.to_csv('new_data/djang_main_contradiction_2.csv', index=False)

In [62]:
merged_df

Unnamed: 0,group,indication,score,contraindications_description,generic_name,contraindication_result
0,contradicted drug,toad,0.9997948,adalimumab should not be administered to patie...,adalimumab,hypersensitivity toadalimumab or any of its co...
1,contradicted drug,acyclovircream,0.9951826,acyclovircream is contraindicated in patients ...,acyclovir (topical),hypersensitivity
2,contradicted drug,aciclov,0.72187907,aciclovir is contraindicated in patients known...,acyclovir (oral),hypersensitive to aciclovir
3,contradicted drug,acyclovir,0.9977528,acyclovir is contraindicated in patients known...,acyclovir (ophthalmic),acyclovir or valacyclovir
4,contradicted drug,acyclovir,0.99675465,acyclovir is contraindicated in patients known...,acyclovir (ophthalmic),acyclovir or valacyclovir
...,...,...,...,...,...,...
2762,contradicted disease,hepatitis b,0.99699664,hypersensitivity; abnormally low neutrophil co...,zidovudine,concomitant use with interferon alfa
2763,contradicted disease,c virus,0.9962046,hypersensitivity; abnormally low neutrophil co...,zidovudine,concomitant use with interferon alfa
2764,contradicted drug,zaleplon,0.9051752,zaleplon is contraindicated in patients with k...,zaleplon,hypersensitivity to any of its components
2765,contradicted disease,renal,0.85930777,zafirlukast should not be given to patients wh...,zafirlukast,patients with a history of moderate or severe ...


In [None]:

words_to_check = ['side effect', 'effect', 
                  'adverse reaction','effects','effect',
                  'side effects','hyper','sync']
mask = specific_df['indication'].str.contains(r'\b(?:' + '|'.join(words_to_check) + r')\b', case=False)

# Use the .loc indexer to keep rows that have additional words or the exact words specified
filtered_df = specific_df.loc[~mask]
