In [5]:
import pandas as pd
from transformers import pipeline, AutoTokenizer, AutoModelForTokenClassification, AutoModelForSequenceClassification
import spacy
from spacy import displacy
from collections import Counter

# Load dataset
file = '../datasets/medical_records.csv'
df = pd.read_csv(file)

# Initialise pipelines
summarizer = pipeline("summarization")
ner_model = pipeline("ner", model=AutoModelForTokenClassification.from_pretrained("dbmdz/bert-large-cased-finetuned-conll03-english"), tokenizer=AutoTokenizer.from_pretrained("bert-base-cased"))
sentiment_model = pipeline("sentiment-analysis", model=AutoModelForSequenceClassification.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment"))

# Preprocess function for patient visits
def summarize_patient_visits(patient_name):
    # Filter data for the given patient name
    patient_data = df[df['Patient Name'] == patient_name]
    
    if patient_data.empty:
        return "No data available for the specified patient."
    
    # Sort visits by date
    patient_data = patient_data.sort_values(by='Date Visited')
    
    # Initialize summary components
    visit_details = []
    all_notes = []
    all_diagnoses = []
    all_medications = []
    sentiment_scores = []
    all_topics = []
    
    # Collect details from each visit
    for _, row in patient_data.iterrows():
        visit_details.append(f"Date: {row['Date Visited']}, Diagnosis: {row['Diagnosis']}, Medication: {row['Medication Given']}")
        all_notes.append(row['Notes'])
        all_diagnoses.append(row['Diagnosis'])
        all_medications.append(row['Medication Given'])
        
        # Sentiment Analysis
        sentiment = sentiment_model(row['Notes'])[0]
        sentiment_scores.append(sentiment)
        
        # Extract topics (keywords) from notes
        doc = spacy.load('en_core_web_sm')(row['Notes'])
        topics = [chunk.text for chunk in doc.noun_chunks]
        all_topics.extend(topics)
    
    # Generate summary for notes
    notes_summary = summarizer(' '.join(all_notes), max_length=150, min_length=30, do_sample=False)[0]['summary_text']
    
    # Calculate average sentiment
    avg_sentiment = sum([sent['score'] * (1 if sent['label'] == 'POSITIVE' else -1) for sent in sentiment_scores]) / len(sentiment_scores)
    sentiment_summary = "Positive" if avg_sentiment > 0 else "Negative"
    
    # Create a final summary
    final_summary = (
        f"Patient: {patient_name}\n"
        f"Total Visits: {len(patient_data)}\n"
        f"Visit Details: \n" + "\n".join(visit_details) + "\n\n"
        f"Diagnosis Summary: {', '.join(set(all_diagnoses))}\n"
        f"Medication Summary: {', '.join(set(all_medications))}\n\n"
        f"Sentiment Analysis: The overall sentiment of the doctor's notes is {sentiment_summary}.\n"
        f"Notes Summary: {notes_summary}\n"
        f"Main Topics: {', '.join(Counter(all_topics).most_common(5))}"
    )
    
    return final_summary

# Example usage
example_patient_name = "Jennifer Anderson"
summary = summarize_patient_visits(example_patient_name)
print(summary)

OSError: [WinError 126] The specified module could not be found. Error loading "c:\Users\moeed\AppData\Local\Programs\Python\Python310\lib\site-packages\torch\lib\fbgemm.dll" or one of its dependencies.