# Detection Module

    The main goal of the detection module is to use the gazetteers out of the ontologies used to enrich PropaPhen into PropaPhen+ to discover relationships between network nodes/systems and the gufo:Entities by text.

In [1]:
%load_ext autoreload
%autoreload 2

## Libraries

### Installing

In [2]:
#!pip install pandas
#!pip install tqdm
#!pip install nltk
#!pip install gatenlp
#!pip install py4j
#!pip install pyodide
#!pip install ipywidgets
#!pip install neo4j

### Standard

In [3]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import nltk
import glob

In [4]:
from gatenlp import Document
from gatenlp.gateworker import GateWorker

### Custom libraries

In [5]:
import sys
sys.path.append('lib/')

In [6]:
from detection.relationshipextraction import RelationshipDiscovery, GateExtractor, CleanDicts, rmToRelationCSV
from detection.schema import Term, Concept, df_to_concepts, cleaningPlaceStr, conceptsToGazetteer
from detection.worldumls import umlsConceptCleanner, isEnglish, worldConceptCleanner
from detection.worldumls import ClearnWorldKGGazetteer
#import detection.observationclustering

## Globals

In [103]:
path_to_covid_journals = "data/textual/covid/newspaper/"
path_to_kb_gazetteer = '../data/gazetteers/kbgazetteer.csv'
path_to_netwoork_gazetteer = '../data/gazetteers/world_gazetteer_en.csv'
path_to_lsts = "data/lst/"
path_to_relation_folder = "../data/neo4j/"
path_to_covid_journalobservationcsv = "../data/neo4j/covid_observations_journal.csv"
path_to_covid_medicalobservationcsv = "../data/neo4j/covid_observations_medical.csv"
path_to_covid_socialobservationcsv = "../data/neo4j/covid_observations_social.csv"
path_to_monkeypox_journalobservationcsv = "../data/neo4j/monkeypox_observations_journal.csv"
path_to_monkeypox_medicalobservationcsv = "../data/neo4j/monkeypox_observations_medical.csv"
path_to_monkeypox_socialobservationcsv = "../data/neo4j/monkeypox_observations_social.csv"

## Relationship Discovery

### KB Gazetteers

In [8]:
kb_concept_list = []
network_concept_list = []

In [9]:
df_kb = pd.read_csv(path_to_kb_gazetteer)

In [10]:
df_kb.head()

Unnamed: 0.1,Unnamed: 0,ID,Name
0,0,C0026106,Mild mental retardation
1,1,C0026351,Moderate mental retardation
2,2,C0036857,Severe mental retardation
3,3,C0020796,Profound mental retardation
4,4,C0025362,Unspecified mental retardation


In [11]:
kb_concept_list = df_to_concepts(df_kb)

Finding Terms


12620098it [13:09, 15993.15it/s]


Creating Term list


In [12]:
for i in tqdm(range(len(kb_concept_list))):
    kb_concept_list[i] = umlsConceptCleanner(kb_concept_list[i])
    kb_concept_list[i] = umlsConceptCleanner(kb_concept_list[i])

100%|█████████████████████████████████████████████████| 7892473/7892473 [00:35<00:00, 220861.90it/s]


In [13]:
umlsdict = conceptsToGazetteer(kb_concept_list,path_to_lsts+"umls.lst",cleaningPlaceStr)

100%|██████████████████████████████████████████████████| 7892473/7892473 [02:19<00:00, 56774.96it/s]


### Place Gazetteers

In [47]:
df_network = pd.read_csv(path_to_netwoork_gazetteer)

In [48]:
washingtonRemoveDoubles = ('wkg:158368533', "Washington")
bradFord = ("wkg:26701367","Bradford")
def removeDoublesInNet(df_network,tupleList):
    list_id_to_remove = []
    for index, row in df_network.iterrows():
        for tupleRemoveDoubles in tupleList:
            if tupleRemoveDoubles[1] in row['Name'] and row['ID']!= tupleRemoveDoubles[0]:
                list_id_to_remove.append(row['ID'])

    df_network = df_network.drop(df_network[df_network.ID.isin(list_id_to_remove)].index.tolist())
    return df_network

In [49]:
df_network = removeDoublesInNet(df_network, [washingtonRemoveDoubles,bradFord])

In [50]:
clear_net_list = ['"Nga"', '"Centre"', '"Kou"', '"San"','"Real"',
                 '"Vincent"', '"Lille"','"North"', '"Barr"', '"North"'
                 ,'"South"','"West"','"East"','"Brito"', '"Utrecht"', '"Bush"',
                 '"Bush"', '"Republic"','"Union"', '"Time"',
                 '"Institute"','"Carbon"','"Center"','"Delhi"','"Mendenhall"']

In [51]:
df_network = ClearnWorldKGGazetteer(df_network,clear_net_list)

In [52]:
df_network.head()

Unnamed: 0.1,Unnamed: 0,ID,Name
0,0,wkg:10,"""Mamassita"""
1,1,wkg:10,"""Mamacita"""
2,2,wkg:1000709658,"""Boulzazen"""
3,3,wkg:1000709658,"""Boulzazen"""
4,4,wkg:1000709660,"""Tizi El Oued"""


In [53]:
network_concept_list = df_to_concepts(df_network)

Finding Terms


1692247it [02:02, 13792.87it/s]


Creating Term list


In [54]:
# Pre-processing network
#for i in tqdm(range(len(network_concept_list))):
#    network_concept_list[i] = worldConceptCleanner(network_concept_list[i])

In [55]:
# Normal
print("Usual name")
normalplacesdict = conceptsToGazetteer(network_concept_list,path_to_lsts+"places.lst",cleaningPlaceStr)
# Cap
#print("Cap name")
#capdict = conceptsToGazetteer(network_concept_list,path_to_lsts+"places_cap.lst",capPlaceStr)
# Lower
#print("Lower name")
#lowerdict = conceptsToGazetteer(network_concept_list,path_to_lsts+"places_lower.lst",lowerPlaceStr)

Usual name


100%|███████████████████████████████████████████████████| 948962/948962 [00:02<00:00, 357207.48it/s]


### GATE

In [64]:
gs = GateWorker(start=False, auth_token="1234")

In [56]:
from nltk.corpus import stopwords
import string

def cleanKeys(dictionary, clean_list):
    for c in clean_list:
        if c in dictionary:
            del dictionary[c]
    return dictionary

def CleanDicts(netdict,kbdict):
    nltk.download('stopwords')
    stopwords_list = stopwords.words('english')
    punctuation = [i for i in string.punctuation  ]
    stopwords_list_maj = [s.title() for s in stopwords_list]
    months = ["January", "February", "March", "April", "May",
              "June", "July", "August", "September", "October", "November", "December"]
    months_lower = [m.lower() for m in months]
    clean_list = stopwords_list + punctuation + list(
        string.ascii_lowercase) + list(
        string.ascii_uppercase) + stopwords_list_maj + months + months_lower
    netdict = cleanKeys(netdict,clean_list) 
    kbdict = cleanKeys(kbdict,clean_list+list(netdict.keys()))
    return netdict, kbdict

In [57]:
normalplacesdict, umlsdict = CleanDicts(normalplacesdict, umlsdict)

[nltk_data] Downloading package stopwords to
[nltk_data]     /home/gabriel/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [60]:
gateExtractor = GateExtractor(umlsdict,normalplacesdict)

Creating KB gazetteer...


100%|███████████████████████████████████████████████████| 7657560/7657560 [17:09<00:00, 7436.93it/s]


Creating Network gazetteer...


100%|██████████████████████████████████████████████████| 1217356/1217356 [01:11<00:00, 16960.45it/s]


Creating Merging gazetteer...


In [65]:
# Annie
gs.worker.loadMavenPlugin("uk.ac.gate.plugins", "annie", "8.6")
# now load the prepared ANNIE pipeline from the plugin
pipeline = gs.worker.loadPipelineFromPlugin("uk.ac.gate.plugins","annie", "/resources/ANNIE_with_defaults.gapp")
pipeline.getName()

'ANNIE'

In [66]:
gateExtractor.extra_pr['annie'] = pipeline

In [67]:
from detection.relationshipextraction import RelationMatrix

class RMGenerator():
    
    def __init__(self, corpus,gateExtractor, gs):
        self.corpus = corpus
        self.gateExtractor = gateExtractor
        self.gs = gs
    
    def directTermMatching(self, matrix_id):
        rm = RelationMatrix(matrix_id)
        # Per document
        for doc in tqdm(self.corpus):
            pdoc = self.gs.gdoc2pdoc(doc)
            pdoc = self.gateExtractor.tokenizer(pdoc)
            pdoc = self.gateExtractor.tok_gaz(pdoc)
            # Making the rm links
            for kb_annotation in pdoc.annset().with_type("kb"):
                for network_annoation in pdoc.annset().with_type("network"):
                    rm.increaseBy(self.gateExtractor.dict_kb[kb_annotation.features['key']], 
                                self.gateExtractor.dict_network[network_annoation.features['key']],1)
            self.gs.del_resource(doc)
        return rm
    
    def paragraphTermMatching(self, matrix_id):
        assert 'annie' in self.gateExtractor.extra_pr.keys()
        rm = RelationMatrix(matrix_id)
        # Per document
        for doc in tqdm(self.corpus):
            # Run annie
            if len(self.gs.gdoc2pdoc(doc).text) <= 0:
                self.gs.del_resource(doc)
                continue
            self.gs.worker.run4Document(self.gateExtractor.extra_pr['annie'], doc)
            pdoc = self.gs.gdoc2pdoc(doc)            
            # Get network and kb
            pdoc = self.gateExtractor.tok_gaz(pdoc)
            # Get paragraph
            praragraphann = pdoc.annset('Original markups').with_type("paragraph")
            # For each paragraph
            for ann in praragraphann:
                # Making the rm links
                for kb_annotation in pdoc.annset().within(ann).with_type('kb'):
                    for network_annoation in pdoc.annset().within(ann).with_type('network'):
                        rm.increaseBy(self.gateExtractor.dict_kb[kb_annotation.features['key']], 
                                self.gateExtractor.dict_network[network_annoation.features['key']],1)
            self.gs.del_resource(doc)
        return rm
    
    def paragraphTermMatchingTransitivity(self, matrix_id):
        assert 'annie' in self.gateExtractor.extra_pr.keys()
        rm = RelationMatrix(matrix_id)
        # Per document
        for doc in tqdm(self.corpus):
            # Run annie
            if len(self.gs.gdoc2pdoc(doc).text) <= 0:
                self.gs.del_resource(doc)
                continue
            self.gs.worker.run4Document(self.gateExtractor.extra_pr['annie'], doc)
            pdoc = self.gs.gdoc2pdoc(doc)            
            # Get network and kb
            pdoc = self.gateExtractor.tok_gaz(pdoc)
            # Get paragraph
            paragraphann = pdoc.annset('Original markups').with_type("paragraph")
            dictKbToKb = {}
            # For each paragraph
            for ann in paragraphann:
                # Making Refs between KB entities
                for kb_annotation1 in pdoc.annset().within(ann).with_type('kb'):
                    for kb_annotation2 in pdoc.annset().within(ann).with_type('kb'):
                        # if same annotation continue
                        if kb_annotation1 == kb_annotation2:
                            continue
                        # If empty list create list
                        if kb_annotation1.features['key'] not in dictKbToKb:
                                dictKbToKb[kb_annotation1.features['key']] = []
                        # Add key to list
                        dictKbToKb[kb_annotation1.features['key']] = dictKbToKb[
                            kb_annotation1.features['key']] +  [kb_annotation2.features['key']]
            for ann in paragraphann:
                # Making the rm links
                for kb_annotation in pdoc.annset().within(ann).with_type('kb'):
                    for network_annoation in pdoc.annset().within(ann).with_type('network'):
                        rm.increaseBy(self.gateExtractor.dict_kb[kb_annotation.features['key']], 
                                self.gateExtractor.dict_network[network_annoation.features['key']],1)
            # Adding transitivity relations
            for ann in paragraphann:
                # Making the rm links
                for network_annoation in pdoc.annset().within(ann).with_type('network'):
                    for kb_annotation in pdoc.annset().within(ann).with_type('kb'):
                        if kb_annotation.features['key'] not in dictKbToKb:
                            continue
                        for transitivityKey in dictKbToKb[kb_annotation.features['key']]:
                            if rm.getValue(self.gateExtractor.dict_kb[transitivityKey], 
                                self.gateExtractor.dict_network[network_annoation.features['key']]) is None:
                                # If link does not exists, then create one
                                rm.increaseBy(self.gateExtractor.dict_kb[transitivityKey], 
                                self.gateExtractor.dict_network[network_annoation.features['key']],1)
                            
            self.gs.del_resource(doc)
        return rm
    
    def sentenceTermMatching(self, matrix_id):
        assert 'annie' in self.gateExtractor.extra_pr.keys()
        rm = RelationMatrix(matrix_id)
        # Per document
        for doc in tqdm(self.corpus):
            # Run annie
            if len(self.gs.gdoc2pdoc(doc).text) <= 0:
                self.gs.del_resource(doc)
                continue
            self.gs.worker.run4Document(self.gateExtractor.extra_pr['annie'], doc)
            pdoc = self.gs.gdoc2pdoc(doc)            
            # Get network and kb
            pdoc = self.gateExtractor.tok_gaz(pdoc)
            # Get paragraph
            sentenceann = pdoc.annset('').with_type("Sentence")
            # For each paragraph
            for ann in sentenceann:
                # Making the rm links
                for kb_annotation in pdoc.annset().within(ann).with_type('kb'):
                    for network_annoation in pdoc.annset().within(ann).with_type('network'):
                        rm.increaseBy(self.gateExtractor.dict_kb[kb_annotation.features['key']], 
                                self.gateExtractor.dict_network[network_annoation.features['key']],1)
            self.gs.del_resource(doc)
        return rm
    
    def sentenceTermMatchingTransitivity(self, matrix_id):
        assert 'annie' in self.gateExtractor.extra_pr.keys()
        rm = RelationMatrix(matrix_id)
        # Per document
        for doc in tqdm(self.corpus):
            # Run annie
            if len(self.gs.gdoc2pdoc(doc).text) <= 0:
                self.gs.del_resource(doc)
                continue
            self.gs.worker.run4Document(self.gateExtractor.extra_pr['annie'], doc)
            pdoc = self.gs.gdoc2pdoc(doc)            
            # Get network and kb
            pdoc = self.gateExtractor.tok_gaz(pdoc)
            # Get paragraph
            sentenceann = pdoc.annset('').with_type("Sentence")
            dictKbToKb = {}
            # For each paragraph
            for ann in sentenceann:
                # Making Refs between KB entities
                for kb_annotation1 in pdoc.annset().within(ann).with_type('kb'):
                    for kb_annotation2 in pdoc.annset().within(ann).with_type('kb'):
                        # if same annotation continue
                        if kb_annotation1 == kb_annotation2:
                            continue
                        # If empty list create list
                        if kb_annotation1.features['key'] not in dictKbToKb:
                                dictKbToKb[kb_annotation1.features['key']] = []
                        # Add key to list
                        dictKbToKb[kb_annotation1.features['key']] = dictKbToKb[
                            kb_annotation1.features['key']] +  [kb_annotation2.features['key']]
            # For each paragraph
            for ann in sentenceann:
                # Making the rm links
                for kb_annotation in pdoc.annset().within(ann).with_type('kb'):
                    for network_annoation in pdoc.annset().within(ann).with_type('network'):
                        rm.increaseBy(self.gateExtractor.dict_kb[kb_annotation.features['key']], 
                                self.gateExtractor.dict_network[network_annoation.features['key']],1)
            # Adding transitivity relations
            for ann in sentenceann:
                # Making the rm links
                for network_annoation in pdoc.annset().within(ann).with_type('network'):
                    for kb_annotation in pdoc.annset().within(ann).with_type('kb'):
                        if kb_annotation.features['key'] not in dictKbToKb:
                            continue
                        for transitivityKey in dictKbToKb[kb_annotation.features['key']]:
                            if rm.getValue(self.gateExtractor.dict_kb[transitivityKey], 
                                self.gateExtractor.dict_network[network_annoation.features['key']]) is None:
                                # If link does not exists, then create one
                                rm.increaseBy(self.gateExtractor.dict_kb[transitivityKey], 
                                self.gateExtractor.dict_network[network_annoation.features['key']],1)
            self.gs.del_resource(doc)
        return rm

## Relationship Discovery - COVID

In [106]:
corpusJournal = gs.getCorpus4Name('PreDiViD-Journal-11-19')
rd = RelationshipDiscovery(corpusJournal, gateExtractor,gs)
rmSentence = rd.rmGen.sentenceTermMatching('PreDiViD-Journal-2019-11-Sentence')
df_rmSentence = rmToRelationCSV(rmSentence, 'Journal_COVID', 1, 'hasPresence',cluster_date='2019-11') 
df_rmSentence.to_csv(path_to_relation_folder+rmSentence.matrix_id+".csv", index=False)

100%|█████████████████████████████████████████████████████████████████| 8/8 [00:04<00:00,  1.95it/s]


In [70]:
corpusMedical = gs.getCorpus4Name('PreDiViD-Medical-12-19')
rd = RelationshipDiscovery(corpusMedical, gateExtractor,gs)
rmSentence = rd.rmGen.sentenceTermMatching('PreDiViD-Medical-2019-12-Sentence')
df_rmSentence = rmToRelationCSV(rmSentence, 'Medical_COVID', 1, 'hasPresence',cluster_date='2019-12') 
df_rmSentence.to_csv(path_to_relation_folder+rmSentence.matrix_id+".csv", index=False)

100%|█████████████████████████████████████████████████████████████| 726/726 [01:05<00:00, 11.02it/s]


In [71]:
corpusSocial = gs.getCorpus4Name('PreDiViD-Social-2-20') # CORRECT corpus dates
rd = RelationshipDiscovery(corpusSocial, gateExtractor,gs)
rmSentence = rd.rmGen.sentenceTermMatching('PreDiViD-Social-2020-20-Sentence')
df_rmSentence = rmToRelationCSV(rmSentence, 'Social_COVID', 1, 'hasPresence',cluster_date='2020-02') 
df_rmSentence.to_csv(path_to_relation_folder+rmSentence.matrix_id+".csv", index=False)

100%|███████████████████████████████████████████████████████████| 4908/4908 [04:41<00:00, 17.44it/s]


## Relationship Discovery - Monkeypox

In [72]:
corpusJournal = gs.getCorpus4Name('PreDiViD-Monkeypox-journal-2022-5')
rd = RelationshipDiscovery(corpusJournal, gateExtractor,gs)
rmSentence = rd.rmGen.sentenceTermMatching('PreDiViD-Monkeypox-journal-2022-5-Sentence')
df_rmSentence = rmToRelationCSV(rmSentence, 'Journal_Monkeypox', 1, 'hasPresence',cluster_date='2022-05') 
df_rmSentence.to_csv(path_to_relation_folder+rmSentence.matrix_id+".csv", index=False)

100%|███████████████████████████████████████████████████████████████| 27/27 [00:03<00:00,  7.09it/s]


In [73]:
corpusMedical = gs.getCorpus4Name('PreDiViD-Monkeypox-pubmed-2022-6')
rd = RelationshipDiscovery(corpusMedical, gateExtractor,gs)
rmSentence = rd.rmGen.sentenceTermMatching('PreDiViD-Monkeypox-medical-2022-6-Sentence')
df_rmSentence = rmToRelationCSV(rmSentence, 'Medical_Monkeypox', 1, 'hasPresence',cluster_date='2022-06') 
df_rmSentence.to_csv(path_to_relation_folder+rmSentence.matrix_id+".csv", index=False)

100%|███████████████████████████████████████████████████████████████| 36/36 [00:02<00:00, 13.80it/s]


In [74]:
corpusSocial = gs.getCorpus4Name('PreDiViD-Monkeypox-Social-2022-5') # CORRECT corpus dates
rd = RelationshipDiscovery(corpusSocial, gateExtractor,gs)
rmSentence = rd.rmGen.sentenceTermMatching('PreDiViD-Monkeypox-Social-2022-5-Sentence')
df_rmSentence = rmToRelationCSV(rmSentence, 'Social_Monkeypox', 1, 'hasPresence',cluster_date='2022-05') 
df_rmSentence.to_csv(path_to_relation_folder+rmSentence.matrix_id+".csv", index=False)

100%|█████████████████████████████████████████████████████████| 33826/33826 [18:45<00:00, 30.05it/s]


## Transitivity

In [None]:
corpus = gs.getCorpus4Name('PreDiViD')
rmGen = RMGenerator(corpus, gateExtractor, gs)
rd = RelationshipDiscovery(corpus, gateExtractor,gs,rmGen)

In [None]:
rmSentence = rd.rmGen.sentenceTermMatchingTransitivity('PreDiViD-COVID-Journal-2019-11-Sentence-Transitivity')

In [None]:
rmParagraph = rd.rmGen.paragraphTermMatching('PreDiViD-COVID-Journal-2019-11-Paragraph-Transitivity')

In [None]:
df_rmParagraph = rmToRelationCSV(rmParagraph, 'Journal', 1, 'hasPresence',cluster_date='2019-11') 
df_rmParagraph.to_csv(path_to_relation_folder+rmParagraph.matrix_id+".csv", index=False)

In [None]:
df_rmSentence = rmToRelationCSV(rmSentence, 'Journal', 1, 'hasPresence',cluster_date='2019-11') 
df_rmSentence.to_csv(path_to_relation_folder+rmSentence.matrix_id+".csv", index=False)

### Observation Mining

In [75]:
from lib.kgce.schema.semantic.neo4jclasses import Neo4jRelation
from lib.kgce.neo4j.handler import Neo4jWrapper

In [76]:
from neo4j import GraphDatabase
from tqdm import tqdm


class Neo4jWrapper:

    def __init__(self, uri, userName, password):
        self.uri = uri
        self.userName = userName
        self.password = password
        # Connect to the neo4j database server
        self.graphDB_Driver  = GraphDatabase.driver(uri, auth=(userName, password)) 
        
    def sendQuery(self, cql_commands):
        result = []
        done_queries = []
        with self.graphDB_Driver.session() as graphDB_Session:
            for cqlCreate in tqdm(cql_commands):
                try:
                    result += [graphDB_Session.run(cqlCreate).to_df()]
                    done_queries.append(cqlCreate)
                except Exception as e:
                    tqdm.write(str(e))
                    tqdm.write(cqlCreate)
                    result += [str(e)]
        return result
    
    def closeConnection(self):
        self.graphDB_Driver.close()

In [77]:
neowrapper = Neo4jWrapper(uri="bolt://localhost:7687",userName="neo4j",password="test")

In [84]:
def GetObservationFromSource(neowrapper,source, filterValue):
    strQuery = """MATCH (n:Country)<-[r:hasPresence]-(c) 
        WHERE toInteger(r.intensity) >= {0} AND r.source = "{1}"
        RETURN n.wkgs_nameEn as System_Name, n.id, c.name, c.id, r.intensity as intensity;""".format(
        filterValue, source)
    result = neowrapper.sendQuery([strQuery])
    df_result_journal = result[0].groupby(['System_Name','n.id'],as_index=False).agg(list)
    return df_result_journal

In [101]:
df_observation_journal_covid = GetObservationFromSource(neowrapper,"Journal_COVID",5)
df_observation_medical_covid = GetObservationFromSource(neowrapper,"Medical_COVID",8)
df_observation_social_covid = GetObservationFromSource(neowrapper,"Social_COVID",10)
df_observation_journal_monkey = GetObservationFromSource(neowrapper,"Journal_Monkeypox",5)
df_observation_medical_monkey = GetObservationFromSource(neowrapper,"Medical_Monkeypox",3)
df_observation_social_monkey = GetObservationFromSource(neowrapper,"Social_Monkeypox",10)

100%|█████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.96it/s]
100%|█████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.97it/s]
100%|█████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.67it/s]
100%|█████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.82it/s]
100%|█████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.31it/s]
100%|█████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.74it/s]


In [104]:
# Saving COVID
df_observation_journal_covid.to_csv(path_to_covid_journalobservationcsv)
df_observation_medical_covid.to_csv(path_to_covid_medicalobservationcsv)
df_observation_social_covid.to_csv(path_to_covid_socialobservationcsv)
# Saving Monkeypox
df_observation_journal_monkey.to_csv(path_to_monkeypox_journalobservationcsv)
df_observation_medical_monkey.to_csv(path_to_monkeypox_medicalobservationcsv)
df_observation_social_monkey.to_csv(path_to_monkeypox_socialobservationcsv)