# Profiling

For each table or spreadsheet
    <br> a. Check Unique Values
    <br> b. Check Data Types
    <br> c. Check Percentage of Missing Values
    <br> d. Check Percentage of Valid Date Format

In [1]:
from profiling.profile import Profiling
from profiling.extract.extract_db import extract_database
from profiling.extract.extract_db import extract_list_table
from profiling.extract.extract_sheet import extract_sheet

import os
from dotenv import load_dotenv

load_dotenv()

# Define DIR
SRC_POSTGRES_DB = os.getenv("SRC_POSTGRES_DB")

In [3]:
##profiling sheet
# Extract data from spreadsheet
df_people = extract_sheet('../data-pipeline/script/data/people.csv')

people_profiling = Profiling(data = df_people, table_name='people')

# get columns from the table
people_profiling.get_columns()

# Set Profiling Rule
# list check data type (all columns)
data_type_column = people_profiling.get_columns()

#list check unique values
unique_values_column = []

#list check percentage missing values
missing_values_column = ['first_name','last_name','birthplace']

#list check valid date values
valid_date_column = []

# Set Profiling rule to object
people_profiling.selected_columns(data_type_column, unique_values_column, missing_values_column, valid_date_column)

# Create Reporting Profiling
report_maintenance_request = people_profiling.reporting()

{'created_at': '2025-04-16', 'report': {'people_id': {'data_type': 'int64'}, 'object_id': {'data_type': 'object'}, 'first_name': {'data_type': 'object', 'percentage_missing_value': np.float64(0.003969846808022619)}, 'last_name': {'data_type': 'object', 'percentage_missing_value': np.float64(0.001764376359121164)}, 'birthplace': {'data_type': 'object', 'percentage_missing_value': np.float64(87.6123135826103)}, 'affiliation_name': {'data_type': 'object'}}}


#### Profiling Data Spreadsheet

In [None]:
# Extract data from spreadsheet
df_maintenance_request = extract_sheet('maintenance_request')

In [None]:
# create profiling object
maintenance_request_profiling = Profiling(data = df_maintenance_request, table_name='maintenance_request')

In [None]:
# get columns from the table
maintenance_request_profiling.get_columns()

['name', 'serial_number', 'request_date', 'location', 'request_note']

In [None]:
# Set Profiling Rule
# list check data type (all columns)
data_type_column = maintenance_request_profiling.get_columns()

#list check unique values
unique_values_column = ['location','request_note']

#list check percentage missing values
missing_values_column = ['request_date','location','request_note']

#list check valid date values
valid_date_column = ['request_date']

# Set Profiling rule to object
maintenance_request_profiling.selected_columns(data_type_column, unique_values_column, missing_values_column, valid_date_column)


In [None]:
# Create Reporting Profiling
report_maintenance_request = maintenance_request_profiling.reporting()

{'created_at': '2024-08-13', 'report': {'name': {'data_type': 'object'}, 'serial_number': {'data_type': 'object'}, 'request_date': {'data_type': 'object', 'percentage_missing_value': 0.0, 'percentage_valid_date': 100.0}, 'location': {'data_type': 'object', 'unique_value': ['labour room', 'Ophthalmic and ENT', 'OPD', 'General Instruments', 'Operation Theater', 'Ophthalmic and ENTm', 'Operation Theater or Manual'], 'percentage_missing_value': 0.0}, 'request_note': {'data_type': 'object', 'unique_value': ['Equipment inspection', 'Routine maintenance', 'Equipment calibration', 'Equipment cleaning', 'Equipment repair'], 'percentage_missing_value': 0.0}}}


In [None]:
report_maintenance_request

{'created_at': '2024-08-13',
 'report': {'name': {'data_type': 'object'},
  'serial_number': {'data_type': 'object'},
  'request_date': {'data_type': 'object',
   'percentage_missing_value': 0.0,
   'percentage_valid_date': 100.0},
  'location': {'data_type': 'object',
   'unique_value': ['labour room',
    'Ophthalmic and ENT',
    'OPD',
    'General Instruments',
    'Operation Theater',
    'Ophthalmic and ENTm',
    'Operation Theater or Manual'],
   'percentage_missing_value': 0.0},
  'request_note': {'data_type': 'object',
   'unique_value': ['Equipment inspection',
    'Routine maintenance',
    'Equipment calibration',
    'Equipment cleaning',
    'Equipment repair'],
   'percentage_missing_value': 0.0}}}

#### Profiling Data Database Clinic

In [None]:
# Extract list of table in Database Clinic
list_table = extract_list_table(db_name='clinic')
print(list_table)

     table_name
0       patient
1   appointment
2    speciality
3        doctor
4  prescription
5    medication


In [None]:
# Profiling Table patient
df_patient = extract_database('clinic', 'patient')

# create profiling object
patient_profiling = Profiling(data = df_patient, table_name='patient')

# get columns from the table
patient_profiling.get_columns()

['patient_id',
 'name',
 'dob',
 'gender',
 'phone_number',
 'address',
 'state_code',
 'created_at']

In [None]:
# Set Profiling Rule
# list check data type (all columns)
data_type_column = patient_profiling.get_columns()

#list check unique values
unique_values = ['state_code']

#list check percentage missing values
missing_values = ['phone_number', 'address', 'state_code']

#list check valid date values
valid_date = ['dob']

# Set Profiling rule to object
patient_profiling.selected_columns(data_type_column, unique_values, missing_values, valid_date)

In [None]:
# Create Reporting Profiling
report_patient = patient_profiling.reporting()
report_patient

{'created_at': '2024-08-13', 'report': {'patient_id': {'data_type': 'int64'}, 'name': {'data_type': 'object'}, 'dob': {'data_type': 'object', 'percentage_valid_date': 100.0}, 'gender': {'data_type': 'object'}, 'phone_number': {'data_type': 'object', 'percentage_missing_value': 0.0}, 'address': {'data_type': 'object', 'percentage_missing_value': 0.0}, 'state_code': {'data_type': 'object', 'unique_value': ['WA', 'NSW', 'NT', 'VIC', 'QLD', 'TAS', 'FO', 'YQ', 'PY', 'ZV', 'RB', 'PU', 'BP', 'LK', 'SV', 'GB', 'HJ', 'JN', 'EC', 'WF', 'SD', 'NY', 'RD', 'LP', 'IU', 'BY', 'JT', 'KH', 'OL', 'VC', 'YG', 'AT', 'UT', 'JA', 'LD', 'EV', 'BD', 'TA', 'OZ', 'CT', 'LB', 'DR', 'FW', 'TL', 'BS', 'PT', 'TI', 'PN', 'QX', 'IJ', 'HV', 'PR', 'WV', 'IQ', 'TR', 'GZ', 'NV', 'EJ', 'NE', 'HB', 'BA', 'AS', 'OP', 'AN', 'UU', 'PG', 'DC', 'YP', 'AH', 'CO', 'MR', 'BX', 'ZN', 'NP', 'PB', 'EW', 'SA', 'WT', 'WL', 'KS', 'CH', 'YF', 'FA', 'AF', 'DN', 'MW', 'JM', 'XI', 'FU', 'MM', 'IF', 'IH', 'CB', 'IA', 'XK', 'FI', 'CV', 'LQ', 

{'created_at': '2024-08-13',
 'report': {'patient_id': {'data_type': 'int64'},
  'name': {'data_type': 'object'},
  'dob': {'data_type': 'object', 'percentage_valid_date': 100.0},
  'gender': {'data_type': 'object'},
  'phone_number': {'data_type': 'object', 'percentage_missing_value': 0.0},
  'address': {'data_type': 'object', 'percentage_missing_value': 0.0},
  'state_code': {'data_type': 'object',
   'unique_value': ['WA',
    'NSW',
    'NT',
    'VIC',
    'QLD',
    'TAS',
    'FO',
    'YQ',
    'PY',
    'ZV',
    'RB',
    'PU',
    'BP',
    'LK',
    'SV',
    'GB',
    'HJ',
    'JN',
    'EC',
    'WF',
    'SD',
    'NY',
    'RD',
    'LP',
    'IU',
    'BY',
    'JT',
    'KH',
    'OL',
    'VC',
    'YG',
    'AT',
    'UT',
    'JA',
    'LD',
    'EV',
    'BD',
    'TA',
    'OZ',
    'CT',
    'LB',
    'DR',
    'FW',
    'TL',
    'BS',
    'PT',
    'TI',
    'PN',
    'QX',
    'IJ',
    'HV',
    'PR',
    'WV',
    'IQ',
    'TR',
    'GZ',
    'NV',
    'E

In [None]:
# Profiling Table doctor
df_doctor = extract_database('clinic', 'doctor')

# create profiling object
doctor_profiling = Profiling(data = df_doctor, table_name='doctor')

# get columns from the table
doctor_profiling.get_columns()

['doctor_id', 'name', 'phone_number', 'speciality_id', 'created_at']

In [None]:
# Set Profiling Rule
# list check data type (all columns)
data_type_column = doctor_profiling.get_columns()

#list check unique values
unique_values = []

#list check percentage missing values
missing_values = ['phone_number']

#list check valid date values
valid_date = []

# Set Profiling rule to object
doctor_profiling.selected_columns(data_type_column, unique_values, missing_values, valid_date)


In [None]:
# Create Reporting Profiling
report_doctor = doctor_profiling.reporting()

{'created_at': '2024-08-13', 'report': {'doctor_id': {'data_type': 'int64'}, 'name': {'data_type': 'object'}, 'phone_number': {'data_type': 'object', 'percentage_missing_value': 0.0}, 'speciality_id': {'data_type': 'float64'}, 'created_at': {'data_type': 'datetime64[ns, UTC]'}}}


In [None]:
# Profiling Table specialty
df_specialty = extract_database('clinic', 'speciality')

# create profiling object
specialty_profiling = Profiling(data = df_specialty, table_name='speciality')

# get columns from the table
specialty_profiling.get_columns()

['speciality_id', 'name', 'created_at']

In [None]:
# Set Profiling Rule
# list check data type (all columns)
data_type_column = specialty_profiling.get_columns()

#list check unique values
unique_values = ['name']

#list check percentage missing values
missing_values = []

#list check valid date values
valid_date = []

# Set Profiling rule to object
specialty_profiling.selected_columns(data_type_column, unique_values, missing_values, valid_date)


In [None]:
# Create Reporting Profiling
report_specialty = specialty_profiling.reporting()


{'created_at': '2024-08-13', 'report': {'speciality_id': {'data_type': 'int64'}, 'name': {'data_type': 'object', 'unique_value': ['Cardiology', 'Dermatology', 'Endocrinology', 'Gastroenterology', 'Neurology', 'Ophthalmology', 'Pediatrics']}, 'created_at': {'data_type': 'datetime64[ns, UTC]'}}}


In [None]:
# Profiling Table medication
df_medication = extract_database('clinic', 'medication')

# create profiling object
medication_profiling = Profiling(data = df_medication, table_name='medication')

# get columns from the table
medication_profiling.get_columns()


['medication_id',
 'name',
 'manufacturer',
 'dosage_form',
 'strength',
 'description',
 'created_at']

In [None]:
#Set Profiling Rule
# list check data type (all columns)
data_type_column = medication_profiling.get_columns()

#list check unique values
unique_values = ['manufacturer', 'dosage_form', 'strength']

#list check percentage missing values
missing_values = ['manufacturer', 'dosage_form', 'strength', 'description']

#list check valid date values
valid_date = []

# Set Profiling rule to object
medication_profiling.selected_columns(data_type_column, unique_values, missing_values, valid_date)

In [None]:
# Create Reporting Profiling
report_medication = medication_profiling.reporting()

{'created_at': '2024-08-13', 'report': {'medication_id': {'data_type': 'int64'}, 'name': {'data_type': 'object'}, 'manufacturer': {'data_type': 'object', 'unique_value': ['ABC Pharma', 'XYZ Pharmaceuticals', 'MediCo', 'Pharmalife', 'HealthMeds', 'Wellness Drugs', 'BreathEasy', 'HealPharma', 'AllergyCare', 'HeartGuard', 'CardioHealth', 'ThyroidWell', 'CholesterolControl', 'ReliefMeds', 'MindBalance', 'BloodCare', 'SleepEase', 'KidneyCare', 'PainRelief', 'WaterBalance', 'MicroMed', 'GastroGuard', 'InflammationControl', 'AsthmaCare', 'HeartCare', 'MediCure', 'BreatheEasy', 'HappinessRx', 'MoodStabilize', 'JointRelief', 'StomachEase', 'SleepWell', 'EmotionBalance', 'NeuroCare', 'AnxietyControl', 'SleepAid', 'ThyroidCare', 'AllergyControl', 'AnxietyEase', 'LoveLife', 'AllergyRelief', 'MoodBalance', 'MentalHealth', 'AnxietyRelief', 'CalmEase'], 'percentage_missing_value': 0.0}, 'dosage_form': {'data_type': 'object', 'unique_value': ['Tablet', 'Capsule', 'Inhaler'], 'percentage_missing_value'

In [None]:
# Profiling Table appointment
df_appointment = extract_database('clinic', 'appointment')

# create profiling object
appointment_profiling = Profiling(data = df_appointment, table_name='appointment')

# get columns from the table
appointment_profiling.get_columns()


['appointment_id',
 'patient_id',
 'doctor_id',
 'appointment_date',
 'notes',
 'status',
 'created_at']

In [None]:
# Set Profiling Rule
# list check data type (all columns)
data_type_column = appointment_profiling.get_columns()

#list check unique values
unique_values = ['status']

#list check percentage missing values
missing_values = appointment_profiling.get_columns()

#list check valid date values
valid_date = ['appointment_date']

# Set Profiling rule to object
appointment_profiling.selected_columns(data_type_column, unique_values, missing_values, valid_date)


In [None]:
# Create Reporting Profiling
report_appointment = appointment_profiling.reporting()

{'created_at': '2024-08-13', 'report': {'appointment_id': {'data_type': 'int64', 'percentage_missing_value': 0.0}, 'patient_id': {'data_type': 'int64', 'percentage_missing_value': 0.0}, 'doctor_id': {'data_type': 'int64', 'percentage_missing_value': 0.0}, 'appointment_date': {'data_type': 'datetime64[ns]', 'percentage_missing_value': 0.0, 'percentage_valid_date': 100.0}, 'notes': {'data_type': 'object', 'percentage_missing_value': 0.0}, 'status': {'data_type': 'object', 'unique_value': ['completed', None, 'Cancelled'], 'percentage_missing_value': 35.095238095238095}, 'created_at': {'data_type': 'datetime64[ns, UTC]', 'percentage_missing_value': 0.0}}}


In [None]:
# Profiling Table prescription
df_prescription = extract_database('clinic', 'prescription')

# create profiling object
prescription_profiling = Profiling(data = df_prescription, table_name='prescription')

# get columns from the table
prescription_profiling.get_columns()

['prescription_id', 'appointment_id', 'medication_id', 'created_at']

In [None]:
# Set Profiling Rule
# list check data type (all columns)
data_type_column = prescription_profiling.get_columns()

#list check unique values
unique_values = []

#list check percentage missing values
missing_values = prescription_profiling.get_columns()

#list check valid date values
valid_date = []

# Set Profiling rule to object
prescription_profiling.selected_columns(data_type_column, unique_values, missing_values, valid_date)

In [None]:
# Create Reporting Profiling
report_prescription = prescription_profiling.reporting()

{'created_at': '2024-08-13', 'report': {'prescription_id': {'data_type': 'int64', 'percentage_missing_value': 0.0}, 'appointment_id': {'data_type': 'int64', 'percentage_missing_value': 0.0}, 'medication_id': {'data_type': 'int64', 'percentage_missing_value': 0.0}, 'created_at': {'data_type': 'datetime64[ns, UTC]', 'percentage_missing_value': 0.0}}}


#### Profiling Database Clinic Ops

In [None]:
# Extract Table from Database Clinic Operation
list_table = extract_list_table(db_name='clinic_ops')
print(list_table)

           table_name
0           equipment
1          speciality
2                role
3              salary
4      leave_requests
5            employee
6  maintenance_record


In [None]:
# Profiling Table employee
df_employee = extract_database('clinic_ops', 'employee')

# create profiling object  
employee_profiling = Profiling(data = df_employee, table_name='employee')

# get columns from the table
employee_profiling.get_columns()

['employee_id',
 'name',
 'phone_number',
 'speciality_id',
 'role_id',
 'created_at']

In [None]:
# Set Profiling Rule
# list check data type (all columns)
data_type_column = employee_profiling.get_columns()

#list check unique values
unique_values = []

#list check percentage missing values
missing_values = employee_profiling.get_columns()

#list check valid date values
valid_date = []

# Set Profiling rule to object
employee_profiling.selected_columns(data_type_column, unique_values, missing_values, valid_date)


In [None]:
# Create Reporting Profiling
report_employee = employee_profiling.reporting()

{'created_at': '2024-08-13', 'report': {'employee_id': {'data_type': 'int64', 'percentage_missing_value': 0.0}, 'name': {'data_type': 'object', 'percentage_missing_value': 0.0}, 'phone_number': {'data_type': 'object', 'percentage_missing_value': 0.0}, 'speciality_id': {'data_type': 'float64', 'percentage_missing_value': 89.46428571428572}, 'role_id': {'data_type': 'int64', 'percentage_missing_value': 0.0}, 'created_at': {'data_type': 'datetime64[ns, UTC]', 'percentage_missing_value': 0.0}}}


In [None]:
# Profiling Table speciality
df_speciality = extract_database('clinic_ops', 'speciality')

# create profiling object
speciality_profiling = Profiling(data = df_speciality, table_name='speciality')

# get columns from the table
speciality_profiling.get_columns()

['speciality_id', 'name', 'created_at']

In [None]:
# Set Profiling Rule
# list check data type (all columns)
data_type_column = speciality_profiling.get_columns()

#list check unique values
unique_values = ['name']

#list check percentage missing values
missing_values = []

#list check valid date values
valid_date = []

# Set Profiling rule to object
speciality_profiling.selected_columns(data_type_column, unique_values, missing_values, valid_date)

In [None]:
# Create Reporting Profiling
report_speciality = speciality_profiling.reporting()

{'created_at': '2024-08-13', 'report': {'speciality_id': {'data_type': 'int64'}, 'name': {'data_type': 'object', 'unique_value': ['Cardiology', 'Dermatology', 'Endocrinology', 'Gastroenterology', 'Neurology', 'Ophthalmology', 'Pediatrics']}, 'created_at': {'data_type': 'datetime64[ns, UTC]'}}}


In [None]:
# Profiling Table role
df_role = extract_database('clinic_ops', 'role')

# create profiling object
role_profiling = Profiling(data = df_role, table_name='role')

# get columns from the table
role_profiling.get_columns()

['role_id', 'name', 'description', 'created_at']

In [None]:
# Set Profiling Rule
# list check data type (all columns)
data_type_column = role_profiling.get_columns()

#list check unique values
unique_values = ['name']

#list check percentage missing values
missing_values = []

#list check valid date values
valid_date = []

# Set Profiling rule to object
role_profiling.selected_columns(data_type_column, unique_values, missing_values, valid_date)

In [None]:
# Create Reporting Profiling
report_role = role_profiling.reporting()

{'created_at': '2024-08-13', 'report': {'role_id': {'data_type': 'int64'}, 'name': {'data_type': 'object', 'unique_value': ['receptionist', 'nurse', 'physician', 'technician', 'administrator', 'doctor']}, 'description': {'data_type': 'object'}, 'created_at': {'data_type': 'datetime64[ns, UTC]'}}}


In [None]:
# Profiling Table salary
df_salary = extract_database('clinic_ops', 'salary')

# create profiling object
salary_profiling = Profiling(data = df_salary, table_name='salary')

# get columns from the table
salary_profiling.get_columns()

['salary_id', 'employee_id', 'amount', 'payment_date', 'created_at']

In [None]:
# Set Profiling Rule
# list check data type (all columns)
data_type_column = salary_profiling.get_columns()

#list check unique values
unique_values = []

#list check percentage missing values
missing_values = salary_profiling.get_columns()

#list check valid date values
valid_date = ['payment_date']

# Set Profiling rule to object
salary_profiling.selected_columns(data_type_column, unique_values, missing_values, valid_date)

In [None]:
# Create Reporting Profiling
report_salary = salary_profiling.reporting()

{'created_at': '2024-08-13', 'report': {'salary_id': {'data_type': 'int64', 'percentage_missing_value': 0.0}, 'employee_id': {'data_type': 'int64', 'percentage_missing_value': 0.0}, 'amount': {'data_type': 'float64', 'percentage_missing_value': 0.0}, 'payment_date': {'data_type': 'object', 'percentage_missing_value': 0.0, 'percentage_valid_date': 100.0}, 'created_at': {'data_type': 'datetime64[ns, UTC]', 'percentage_missing_value': 0.0}}}


In [None]:
# Profiling Table leave_request
df_leave_request = extract_database('clinic_ops', 'leave_requests')

# create profiling object
leave_request_profiling = Profiling(data = df_leave_request, table_name='leave_requests')

# get columns from the table
leave_request_profiling.get_columns()


['leave_id',
 'employee_id',
 'leave_type',
 'start_date',
 'end_date',
 'status',
 'created_at']

In [None]:
# Set Profiling Rule
# list check data type (all columns)
data_type_column = leave_request_profiling.get_columns()

#list check unique values
unique_values = ['leave_type','status']

#list check percentage missing values
missing_values = leave_request_profiling.get_columns()

#list check valid date values
valid_date = ['start_date','end_date']

# Set Profiling rule to object
leave_request_profiling.selected_columns(data_type_column, unique_values, missing_values, valid_date)

In [None]:
# Create Reporting Profiling
report_leave_request = leave_request_profiling.reporting()

{'created_at': '2024-08-13', 'report': {'leave_id': {'data_type': 'int64', 'percentage_missing_value': 0.0}, 'employee_id': {'data_type': 'int64', 'percentage_missing_value': 0.0}, 'leave_type': {'data_type': 'object', 'unique_value': ['Annual', 'Parental', 'Casual', 'Religous', 'Sick'], 'percentage_missing_value': 0.0}, 'start_date': {'data_type': 'object', 'percentage_missing_value': 0.0, 'percentage_valid_date': 100.0}, 'end_date': {'data_type': 'object', 'percentage_missing_value': 0.0, 'percentage_valid_date': 100.0}, 'status': {'data_type': 'object', 'unique_value': ['Approved', 'Rejected', 'Pending'], 'percentage_missing_value': 0.0}, 'created_at': {'data_type': 'datetime64[ns, UTC]', 'percentage_missing_value': 0.0}}}


In [None]:
# Profiling Table equipment
df_equipment = extract_database('clinic_ops', 'equipment')

# create profiling object
equipment_profiling = Profiling(data = df_equipment, table_name='equipment')

# get columns from the table
equipment_profiling.get_columns()


['equipment_id',
 'name',
 'serial_number',
 'purchase_date',
 'warranty_expiration',
 'location',
 'created_at']

In [None]:
# Set Profiling Rule
# list check data type (all columns)
data_type_column = equipment_profiling.get_columns()

#list check unique values
unique_values = ['location']

#list check percentage missing values
missing_values = equipment_profiling.get_columns()

#list check valid date values
valid_date = []

# Set Profiling rule to object
equipment_profiling.selected_columns(data_type_column, unique_values, missing_values, valid_date)

In [None]:
# Create Reporting Profiling
report_equipment = equipment_profiling.reporting()

{'created_at': '2024-08-13', 'report': {'equipment_id': {'data_type': 'int64', 'percentage_missing_value': 0.0}, 'name': {'data_type': 'object', 'percentage_missing_value': 0.0}, 'serial_number': {'data_type': 'object', 'percentage_missing_value': 0.0}, 'purchase_date': {'data_type': 'object', 'percentage_missing_value': 0.0}, 'warranty_expiration': {'data_type': 'object', 'percentage_missing_value': 0.0}, 'location': {'data_type': 'object', 'unique_value': ['General Instruments', 'OPD', 'Operation Theater', 'Operation Theater or Manual', 'Ophthalmic and ENT', 'Ophthalmic and ENTm', 'labour room'], 'percentage_missing_value': 0.0}, 'created_at': {'data_type': 'datetime64[ns, UTC]', 'percentage_missing_value': 0.0}}}


In [None]:
# Profiling Table maintenance_report
df_maintenance_report = extract_database('clinic_ops', 'maintenance_record')

# create profiling object
maintenance_report_profiling = Profiling(data = df_maintenance_report, table_name='maintenance_record')

# get columns from the table
maintenance_report_profiling.get_columns()

['record_id',
 'equipment_id',
 'maintenance_date',
 'description',
 'cost',
 'created_at']

In [None]:
# Set Profiling Rule
# list check data type (all columns)
data_type_column = maintenance_report_profiling.get_columns()

#list check unique values
unique_values = []

#list check percentage missing values
missing_values = maintenance_report_profiling.get_columns()

#list check valid date values
valid_date = ['maintenance_date']

# Set Profiling rule to object
maintenance_report_profiling.selected_columns(data_type_column, unique_values, missing_values, valid_date)


In [None]:
# Create Reporting Profiling
report_maintenance_report = maintenance_report_profiling.reporting()

{'created_at': '2024-08-13', 'report': {'record_id': {'data_type': 'int64', 'percentage_missing_value': 0.0}, 'equipment_id': {'data_type': 'int64', 'percentage_missing_value': 0.0}, 'maintenance_date': {'data_type': 'object', 'percentage_missing_value': 0.0, 'percentage_valid_date': 100.0}, 'description': {'data_type': 'object', 'percentage_missing_value': 0.0}, 'cost': {'data_type': 'object', 'percentage_missing_value': 0.0}, 'created_at': {'data_type': 'datetime64[ns, UTC]', 'percentage_missing_value': 0.0}}}
