### This Code will generate a excel file with multiple sheets of hospital's data for Indian People

First install the faker package

In [None]:
!pip install faker

then run the below code (customize the code as you want)

In [4]:
import pandas as pd
from faker import Faker
import random

fake = Faker('en_IN')  # Use Indian locale for names and addresses

# Function to generate synthetic data for the Patient table
def generate_patient_data(num_rows):
    data = {
        'PatientID': list(range(1, num_rows + 1)),
        'Name': [fake.name() for _ in range(num_rows)],
        'DateOfBirth': [fake.date_of_birth(minimum_age=18, maximum_age=90) for _ in range(num_rows)],
        'Gender': [random.choice(['Male', 'Female']) for _ in range(num_rows)],
        'ContactNumber': [fake.phone_number() for _ in range(num_rows)],
    }
    return pd.DataFrame(data)

# Function to generate synthetic data for the Doctor table
def generate_doctor_data(num_rows):
    data = {
        'DoctorID': list(range(1, num_rows + 1)),
        'Name': [fake.name() for _ in range(num_rows)],
        'Specialization': [fake.job() for _ in range(num_rows)],
        'ContactNumber': [fake.phone_number() for _ in range(num_rows)],    
    }
    return pd.DataFrame(data)

# Function to generate synthetic data for the Nurse table
def generate_nurse_data(num_rows):
    data = {
        'NurseID': list(range(1, num_rows + 1)),
        'Name': [fake.name() for _ in range(num_rows)],
        'Department': [fake.job() for _ in range(num_rows)],
        'ContactNumber': [fake.phone_number() for _ in range(num_rows)],
        }
    return pd.DataFrame(data)

# Function to generate synthetic data for the Appointment table
def generate_appointment_data(num_rows, num_patients, num_doctors, num_nurses):
    data = {
        'AppointmentID': list(range(1, num_rows + 1)),
        'Date': [fake.date_this_month() for _ in range(num_rows)],
        'Time': [fake.time() for _ in range(num_rows)],
        'Location': [fake.address() for _ in range(num_rows)],
        'PatientID': [random.randint(1, num_patients) for _ in range(num_rows)],
        'DoctorID': [random.randint(1, num_doctors) for _ in range(num_rows)],
        'NurseID': [random.randint(1, num_nurses) for _ in range(num_rows)],
    }
    return pd.DataFrame(data)

# Function to generate synthetic data for the Medical Record table
def generate_medical_record_data(num_rows, num_patients):
    data = {
        'RecordID': list(range(1, num_rows + 1)),
        'Date': [fake.date_this_month() for _ in range(num_rows)],
        'Diagnosis': [fake.sentence() for _ in range(num_rows)],
        'Prescription': [fake.sentence() for _ in range(num_rows)],
        'PatientID': [random.randint(1, num_patients) for _ in range(num_rows)],
    }
    return pd.DataFrame(data)

# Set the desired number of rows for each table
num_rows_patient = 150
num_rows_doctor = 20
num_rows_nurse = 15
num_rows_appointment = 150
num_rows_medical_record = 120

# Generate synthetic data for each table
patient_data = generate_patient_data(num_rows_patient)
doctor_data = generate_doctor_data(num_rows_doctor)
nurse_data = generate_nurse_data(num_rows_nurse)
appointment_data = generate_appointment_data(num_rows_appointment, num_rows_patient, num_rows_doctor, num_rows_nurse)
medical_record_data = generate_medical_record_data(num_rows_medical_record, num_rows_patient)

# Create Excel file
with pd.ExcelWriter('healthcare_data_indian.xlsx', engine='openpyxl') as writer:
    patient_data.to_excel(writer, sheet_name='Patient', index=False)
    doctor_data.to_excel(writer, sheet_name='Doctor', index=False)
    nurse_data.to_excel(writer, sheet_name='Nurse', index=False)
    appointment_data.to_excel(writer, sheet_name='Appointment', index=False)
    medical_record_data.to_excel(writer, sheet_name='MedicalRecord', index=False)

print("Synthetic data generated and exported to healthcare_data_indian.xlsx.")


Synthetic data generated and exported to healthcare_data_indian.xlsx.


### below code will generate different csv files

In [1]:
import pandas as pd
from faker import Faker
import random

fake = Faker('en_IN')  # Use Indian locale for names and addresses



# Function to generate synthetic data for the Patient table
def generate_patient_data(num_rows):
    data = {
        'PatientID': list(range(1, num_rows + 1)),
        'Name': [fake.name() for _ in range(num_rows)],
        'DateOfBirth': [fake.date_of_birth(minimum_age=18, maximum_age=90) for _ in range(num_rows)],
        'Gender': [random.choice(['Male', 'Female']) for _ in range(num_rows)],
        'City': [fake.city() for _ in range(num_rows)],
        'ContactNumber': [fake.phone_number() for _ in range(num_rows)],
    }
    return pd.DataFrame(data)

# Function to generate synthetic data for the Doctor table
def generate_doctor_data(num_rows):
    data = {
        'DoctorID': list(range(1, num_rows + 1)),
        'Name': [fake.name() for _ in range(num_rows)],
        'Specialization': [fake.job().replace(',', ';') for _ in range(num_rows)],
        'ContactNumber': [fake.phone_number() for _ in range(num_rows)],
        'Salary': [fake.random_int(min=50000, max=150000) for _ in range(num_rows)],
    }
    return pd.DataFrame(data)

# Function to generate synthetic data for the Nurse table
def generate_nurse_data(num_rows):
    data = {
        'NurseID': list(range(1, num_rows + 1)),
        'Name': [fake.name() for _ in range(num_rows)],
        'Department': [fake.job().replace(',', ';') for _ in range(num_rows)],
        'ContactNumber': [fake.phone_number() for _ in range(num_rows)],
    }
    return pd.DataFrame(data)

# Function to generate synthetic data for the Appointment table
def generate_appointment_data(num_rows, num_patients, num_doctors, num_nurses):
    data = {
        'AppointmentID': list(range(1, num_rows + 1)),
        'Date': [fake.date_this_month() for _ in range(num_rows)],
        'Time': [fake.time() for _ in range(num_rows)],
        'Location': [fake.address().replace('\n', ' ').replace(',','') for _ in range(num_rows)],
        'PatientID': [random.randint(1, num_patients) for _ in range(num_rows)],
        'DoctorID': [random.randint(1, num_doctors) for _ in range(num_rows)],
        'NurseID': [random.randint(1, num_nurses) for _ in range(num_rows)],
    }
    return pd.DataFrame(data)

# Function to generate synthetic data for the Medical Record table
def generate_medical_record_data(num_rows, num_patients):
    data = {
        'RecordID': list(range(1, num_rows + 1)),
        'Date': [fake.date_this_month() for _ in range(num_rows)],
        'Diagnosis': [fake.sentence() for _ in range(num_rows)],
        'Prescription': [fake.sentence() for _ in range(num_rows)],
        'PatientID': [random.randint(1, num_patients) for _ in range(num_rows)],
    }
    return pd.DataFrame(data)

# Set the desired number of rows for each table
num_rows_patient = 150
num_rows_doctor = 20
num_rows_nurse = 15
num_rows_appointment = 150
num_rows_medical_record = 120

# Generate synthetic data for each table
patient_data = generate_patient_data(num_rows_patient)
doctor_data = generate_doctor_data(num_rows_doctor)
nurse_data = generate_nurse_data(num_rows_nurse)
appointment_data = generate_appointment_data(num_rows_appointment, num_rows_patient, num_rows_doctor, num_rows_nurse)
medical_record_data = generate_medical_record_data(num_rows_medical_record, num_rows_patient)

# Create CSV files
patient_data.to_csv('patient_data.csv', index=False)
doctor_data.to_csv('doctor_data.csv', index=False)
nurse_data.to_csv('nurse_data.csv', index=False)
appointment_data.to_csv('appointment_data.csv', index=False)
medical_record_data.to_csv('medical_record_data.csv', index=False)

print("Synthetic data generated and exported to CSV files.")


Synthetic data generated and exported to CSV files.
