In [1]:
import pandas as pd
import random

# Load your dataset
df = pd.read_csv('dataset1.csv') 

# Base reasons for all
general_no_show_reasons = [
    "Patient reported forgetting the appointment.",
    "Missed due to lack of transportation.",
    "Appointment was too early in the morning.",
    "Family emergency prevented attendance.",
    "Patient confused the appointment date.",
    "Patient felt better and didn't think follow-up was necessary.",
    "Couldn't get time off from work.",
    "Patient decided to try alternative treatment.",
    "Bad weather on appointment day.",
    "Patient was anxious about the visit.",
]

# Age-specific no-show reasons
def get_age_based_reasons(age):
    if age < 18:
        return [
            "Minor dependent on guardian who couldn’t bring them.",
            "School-related activities caused scheduling conflict."
        ]
    elif age >= 60:
        return [
            "Patient reported mobility issues.",
            "Health deterioration on the day of appointment.",
            "Elderly patient forgot the appointment time.",
        ]
    else:
        return []

# Gender-specific reasons (F = female, M = male, others treated as general)
def get_gender_based_reasons(gender):
    gender = gender.strip().upper()
    if gender == 'F':
        return [
            "Patient had childcare responsibilities.",
            "Couldn't leave home due to household duties.",
        ]
    elif gender == 'M':
        return [
            "Patient reported high workload at job.",
            "Missed due to extended shift or overtime.",
        ]
    else:
        return []

# Medical notes generation
def generate_medical_notes(row):
    notes = []
    if row['Hipertension'] == 1:
        notes.append(
            "Patient has a known history of hypertension. Prescribed Amlodipine 5mg daily. Advised salt intake reduction."
        )
    if row['Diabetes'] == 1:
        notes.append(
            "Type 2 diabetes under monitoring. Continued Metformin 500mg BID. Scheduled next HbA1c test in 3 months."
        )
    if row['Alcoholism'] == 1:
        notes.append(
            "Patient reports ongoing alcohol consumption. Provided brief intervention and referral to support group."
        )
    if row['Handcap'] > 0:
        notes.append(f"Patient reports handicap level {row['Handcap']}. Needs assistance on visit.")
    return " ".join(notes) if notes else "No ongoing chronic conditions noted. General checkup advised."

# Generate patient sentiment notes based on health and demographic factors
def generate_patient_sentiment(row):
    sentiments = []
    # Diabetes
    if row['Diabetes'] == 1:
        sentiments.append("Patient experiences stress and anxiety managing blood sugar levels and dietary restrictions.")
        if str(row['No-show']).strip().lower() == 'yes':
            sentiments.append("Patient feels overwhelmed and confused by frequent diabetes appointments, leading to missed visits.")
    # Hypertension
    if row['Hipertension'] == 1:
        sentiments.append("Patient expresses fear and anxiety about high blood pressure and possible complications.")
        if str(row['No-show']).strip().lower() == 'yes':
            sentiments.append("Patient is fearful of medication side effects and feels hopeless about long-term control, contributing to avoidance of follow-ups.")
    # Alcoholism
    if row['Alcoholism'] == 1:
        sentiments.append("Patient feels stigma, stress, and anxiety discussing alcohol use with healthcare staff.")
        if str(row['No-show']).strip().lower() == 'yes':
            sentiments.append("Patient avoids appointments due to fear of judgment and hopelessness about recovery.")
    # Age-based
    if row['Age'] < 18:
        sentiments.append("Patient (minor) is anxious and fearful about medical procedures, sometimes confused by instructions, and stressed by separation from family.")
    elif row['Age'] >= 60:
        sentiments.append("Elderly patient expresses fear of declining health, confusion about medications, and stress related to mobility issues.")
    # If no specific sentiment, add a hopeful note
    if not sentiments:
        sentiments.append("Patient is hopeful and shows no significant anxiety, stress, or fear related to health conditions.")
    return " ".join(sentiments)

# Simulate notes including conditional reasons
def simulate_notes(row):
    # Compile reasons based on gender and age
    reasons = general_no_show_reasons + \
              get_gender_based_reasons(row['Gender']) + \
              get_age_based_reasons(row['Age'])

    # Add notes and reasons
    row['PatientNotes'] = generate_medical_notes(row)
    row['PatientSentiment'] = generate_patient_sentiment(row)
    if str(row['No-show']).strip().lower() == 'yes':
        row['NoShowReason'] = random.choice(reasons)
    else:
        row['NoShowReason'] = "N/A"
    return row

# Apply to dataset
df = df.apply(simulate_notes, axis=1)

# Save result
df.to_csv('dataset.csv', index=False)

# Sample output
print(df[['PatientId', 'Age', 'Gender', 'PatientNotes', 'NoShowReason', 'PatientSentiment']].head(10))


      PatientId  Age Gender  \
0  2.987250e+13   62      F   
1  5.589978e+14   56      M   
2  4.262962e+12   62      F   
3  8.679512e+11    8      F   
4  8.841186e+12   56      F   
5  9.598513e+13   76      F   
6  7.336882e+14   23      F   
7  3.449833e+12   39      F   
8  5.639473e+13   21      F   
9  7.812456e+13   19      F   

                                        PatientNotes  \
0  Patient has a known history of hypertension. P...   
1  No ongoing chronic conditions noted. General c...   
2  No ongoing chronic conditions noted. General c...   
3  No ongoing chronic conditions noted. General c...   
4  Patient has a known history of hypertension. P...   
5  Patient has a known history of hypertension. P...   
6  No ongoing chronic conditions noted. General c...   
7  No ongoing chronic conditions noted. General c...   
8  No ongoing chronic conditions noted. General c...   
9  No ongoing chronic conditions noted. General c...   

                                NoShowRea