In [None]:
import ipywidgets as widgets
import pandas as pd
from IPython.display import clear_output, display, HTML
import re
import csv
from datetime import datetime

# Mock dataset
data = pd.read_csv("bleed_lung_cancer_notes.csv", nrows=100)



In [3]:
!pwd

/Users/rsingh/Programming/datarequests/lung_cancer_bleeding


In [2]:
from sqlalchemy import create_engine, Column, Integer, String, Text
from sqlalchemy.orm import sessionmaker
from sqlalchemy.ext.declarative import declarative_base
from fastapi import FastAPI
from pydantic import BaseModel
from typing import List
from scispacy.lemmatizer import SpacyLemmatizer
from scispacy.negation import Negex
from scispacy.language import ScispaCy
import re

# Setting up database
engine = create_engine('sqlite:///annotations.db')
Session = sessionmaker(bind=engine)

Base = declarative_base()

class Note(Base):
    __tablename__ = 'notes'

    id = Column(Integer, primary_key=True)
    text = Column(Text)

class Annotation(Base):
    __tablename__ = 'annotations'

    id = Column(Integer, primary_key=True)
    note_id = Column(Integer)
    start = Column(Integer)
    end = Column(Integer)
    text = Column(String)

Base.metadata.create_all(engine)

# Loading clinical notes and applying lemmatization and negation detection
nlp = ScispaCy()
lemmatizer = SpacyLemmatizer(nlp)
negex = Negex(nlp)

session = Session()
notes = session.query(Note).all()

for note in notes:
    doc = nlp(note.text)
    lemmatized_text = ' '.join([lemmatizer.lemmatize(token) for token in doc])
    doc = nlp(lemmatized_text)
    negated_phrases = negex.get_negated(doc)

    # Searching for sentences that match the provided regular expressions
    for match in re.finditer(r'((bleed\sAND\sstool)|hemato|poe)', doc.text):
        # Checking if the match is in a negated context
        if match.group() not in negated_phrases:
            # Storing the match in the database
            annotation = Annotation(note_id=note.id, start=match.start(), end=match.end(), text=match.group())
            session.add(annotation)

session.commit()

# FastAPI application
class AnnotationModel(BaseModel):
    note_id: int
    start: int
    end: int
    text: str

app = FastAPI()

@app.get('/annotations/{note_id}', response_model=List[AnnotationModel])
def get_annotations(note_id: int):
    annotations = session.query(Annotation).filter_by(note_id=note_id).all()
    return [annotation.__dict__ for annotation in annotations]

ModuleNotFoundError: No module named 'fastapi'

In [9]:
data.head()

Unnamed: 0,patient_id,doc_id,text_date,text_id,text_sequence,text,text_tag_1,text_tag_2,text_tag_3,text_tag_4,text_tag_5
0,35375964,27880768,2013-01-09,9000001131102020,1,Reason for Call:\n· Service: Thoracic Medical...,"Telephone/Electronic Communication, Nursing-Sy...","Albanese, Patricia ...",ClinDoc,2013-01-07,1
1,35286331,27881572,2013-01-09,9000001193102020,2,Chief Complaint:\nPatient presents for follow-...,Follow Up - Dermatology,"LACOUTURE MD,MARIO E ...",ClinDoc,2013-01-09,1
2,35363527,27899786,2013-01-14,9000001131102020,3,Reason for Call:\n· Service: Thoracic Medical...,"Telephone/Electronic Communication, Nursing-In...","Culkin, Ann ...",ClinDoc,2013-01-08,1
3,35369076,27903206,2013-01-15,9000001131102020,4,Reason for Call:\n· Service: Thoracic Medical...,"Telephone/Electronic Communication, Nursing-Sy...","English, Maureen ...",ClinDoc,2013-01-04,1
4,35361521,27904147,2013-01-15,9000001106102020,5,CHIEF COMPLAINT: \r\n· \tChief Complaint: anxi...,Follow Up Note - Psychiatry,"KEY MD,RICHARD G ...",ClinDoc,2013-01-15,1
