In [11]:
import pandas as pd
import re

file_path = 'Patient.xlsx' # I renamed it but was originally called Patient Report - Treatment Plan Sample Data

try:
    df = pd.read_excel(file_path)
    df_11th_column = df.iloc[:, [10]]  
except FileNotFoundError:
    print(f"The file '{file_path}' was not found. Please check the file name and location.")
    df_11th_column = pd.DataFrame()  
except Exception as e:
    print(f"An error occurred: {e}")
    df_11th_column = pd.DataFrame()

df_11th_column.head()


Unnamed: 0,Interventions / Frequency:19
0,PC will meet with pt for individual counseling...
1,Trauma - Tuesdays 9-10:15am\n• Psycho-educatio...
2,PC will meet with client 1 on 1 weekly for 1 h...
3,PC will meet with pt for individual counseling...


In [17]:
def clean_text(text):
    text = re.sub(r'\r\n|\r|\n', '\n', text)
    text = re.sub(r'[ \t]+', ' ', text)
    text = re.sub(r'\n\s*\n', '\n\n', text.strip())
    return text

def cut_after_hours(para):
    match = re.search(r'.*?hours\s*/?\s*week', para, re.IGNORECASE)
    return match.group(0) if match else para


In [21]:
all_cut_paragraphs = []

for index, row in df_11th_column.iterrows():
    text_data = clean_text(row.iloc[0])

    paragraphs = re.split(r'\n\s*\n', text_data)

    filtered_paragraphs = [para for para in paragraphs if re.search(r'hours\s*/?\s*week', para, re.IGNORECASE)]

    cut_paragraphs = [cut_after_hours(para) for para in filtered_paragraphs]
    all_cut_paragraphs.extend(cut_paragraphs)

    if cut_paragraphs:
        print(f"Row {index + 1} - Filtered and Cut Paragraphs Ending After 'hours/week':\n")
        for i, para in enumerate(cut_paragraphs):
            print(f"Filtered and Cut Paragraph {i + 1}:\n{para}\n")


Row 1 - Filtered and Cut Paragraphs Ending After 'hours/week':

Filtered and Cut Paragraph 1:
Yoga MONDAY Mon 9-10:15 am - 1.25 hours/week

Filtered and Cut Paragraph 2:
Acupuncture MONDAY 10:30-11:45 - 1.25 hours/week

Filtered and Cut Paragraph 3:
Spirituality of Imperfection TUESDAY 9-10:15 am - 1.25 hours/week

Filtered and Cut Paragraph 4:
REBT TUESDAY 10:30-11:45 - 1.25 hours/week

Filtered and Cut Paragraph 5:
IOP/OP WEDNESDAY 9-10:15 am - 1.25 hours/week

Filtered and Cut Paragraph 6:
Art Therapy in Recovery WEDNESDAY 10:30-11:45 - 1.25 hours/week

Filtered and Cut Paragraph 7:
Mindfulness Relapse Prevention FRIDAY 9-10:15 am - 1.25 hours/week

Filtered and Cut Paragraph 8:
Alone-liness Alleviation FRIDAY 10:30-11:45 - 1.25 hours/week

Row 4 - Filtered and Cut Paragraphs Ending After 'hours/week':

Filtered and Cut Paragraph 1:
Trauma 9-10:15 am - 1.25 hours/week

Filtered and Cut Paragraph 2:
Harmony Hope and Healing Tuesday 10:30-11:45 - 1.25 hours/week

Filtered and Cut Para

In [None]:
from transformers import pipeline

pipe = pipeline("token-classification", model ="Clinical-AI-Appolo/Meedical-NER", aggregation_strategy='simple')

In [26]:
pattern = re.compile(
    r'^(.*?)(MONDAY|TUESDAY|WEDNESDAY|THURSDAY|FRIDAY)\s+([0-9:-]+\s*[APMapm]*?)\s*-\s*([0-9.]+\s*hours/week)',
    re.IGNORECASE
)

programs = []
weekdays = []
times = []
hours_per_week = []

for para in all_cut_paragraphs:
    match = pattern.search(para.strip())
    if match:
        programs.append(match.group(1).strip())
        weekdays.append(match.group(2).capitalize())
        times.append(match.group(3).strip())
        hours_per_week.append(match.group(4).strip())


In [23]:
df_program_details = pd.DataFrame({
    'Program Name': programs,
    'Weekday': weekdays,
    'Time': times,
    'Hours per Week': hours_per_week
})

print("\nExtracted Program Details:\n", df_program_details)


Extracted Program Details:
                      Program Name    Weekday         Time   Hours per Week
0                     Acupuncture     Monday  10:30-11:45  1.25 hours/week
1    Spirituality of Imperfection    Tuesday   9-10:15 am  1.25 hours/week
2                            REBT    Tuesday  10:30-11:45  1.25 hours/week
3                          IOP/OP  Wednesday   9-10:15 am  1.25 hours/week
4         Art Therapy in Recovery  Wednesday  10:30-11:45  1.25 hours/week
5  Mindfulness Relapse Prevention     Friday   9-10:15 am  1.25 hours/week
6        Alone-liness Alleviation     Friday  10:30-11:45  1.25 hours/week
7        Harmony Hope and Healing    Tuesday  10:30-11:45  1.25 hours/week
8  Mindfulness Relapse Prevention     Friday   9-10:15 am  1.25 hours/week
