**MODULE 1**

1. Load the data
Read the patients data using 'Hospital_patients_datasets.csv' through pandas library and return the dataset for the further analysis.

In [None]:
import numpy as np
import pandas as pd


def read_csv():
    # Method to read the CSV file "Hospital_patients_datasets.csv" using pandas.
    # Returns: Pandas DataFrame containing the data from the CSV file.
    ds = pd.read_csv("Hospital_patients_datasets.csv")
    return ds

In [None]:
def check_duplicates():
    ds = read_csv()
    # Method to check for duplicate rows in the DataFrame.
    # Returns: The number of duplicated rows found in the DataFrame.
    ds = ds.duplicated(keep=False).sum()
    return ds


In [None]:
def check_null_values():
    ds = read_csv()
    # Method to check for null (missing) values in the DataFrame.
    # Returns: A pandas Series indicating the count of null values for each column in the DataFrame.
    ds = ds.isna().sum()
    return ds

In [None]:
def converting_dtype():
    ds = read_csv()
    # Method to convert 'ScheduledDay' and 'AppointmentDay' columns to datetime objects.
    # Returns: DataFrame with 'ScheduledDay' and 'AppointmentDay' columns converted to datetime objects.
    ds['ScheduledDay'] = pd.to_datetime(ds['ScheduledDay'], format="%Y-%m-%dT%H:%M:%SZ", utc = False).dt.normalize()
    ds['AppointmentDay'] = pd.to_datetime(ds['AppointmentDay'], format="%Y-%m-%dT%H:%M:%SZ", utc = False).dt.normalize()
    return ds

5. Renaming the columns
The rename_columns() function renames specific columns ('Hipertension' to 'Hypertension', 'Handcap' to 'Handicap', 'SMS_received' to 'SMSReceived', 'No-show' to 'NoShow'), and return the modified dataset.

In [None]:
def rename_columns():
    ds = converting_dtype()
    # Method to rename some columns in the DataFrame.
    # Returns: DataFrame with certain column names changed to new names.
    ds = ds.rename(columns={'Hipertension': 'Hypertension', 'Handcap': 'Handicap', 'SMS_received': 'SMSRecevied', 'No-show':'NoShow'})
    return ds

**MODULE 2**

1. Drop the unwanted columns
Drop the columns 'PatientId', 'AppointmentID', and 'Neighbourhood' from the DataFrame, and return the modified DataFrame with these columns removed.

In [None]:
import numpy as np
import pandas as pd
import module1 as m1


def drop_columns():
    ds = m1.rename_columns()
    # Method to drop unnecessary columns from the DataFrame.
    # Returns: DataFrame with specified columns dropped.
    ds = ds.drop(['PatientId', 'AppointmentID','Neighbourhood' ], axis=1)
    return ds

2. Drop the rows where the value in the 'Age' column is equal to 0. And generate age group labels(given) and categorizes the 'Age' values into bins with 20-year intervals. Use pd.cut() function from the pandas library to create an 'Age_group' column in the DataFrame.

In [None]:
def create_bin():
    ds = drop_columns()
    #First Drop rows with Age == 0
    ds = ds[ds.Age != 0]
    # Generating labels for age intervals (e.g., '1 - 20', '21 - 40', etc.)
    labels = ["{0} - {1}".format(i, i + 20) for i in range(1, 118, 20)]

    # Using the pd.cut() function to categorize ages into groups(use bins = range(1, 130, 20) ,right=False and use the given labels)
    ds['Age_group'] = pd.cut(ds['Age'], bins=range(1, 130, 20), labels=labels, right=False)

    # Returning the modified dataset with assigned age groups
    return ds

In [None]:
def drop():
    ds = create_bin()
    ds = ds.drop('Age', axis=1)
    # Method to drop the original 'Age' column from the DataFrame.
    # Returns: DataFrame with the 'Age' column dropped.
    return ds


Convert the 'NoShow' column into a binary format

In [None]:
def convert():
    ds = drop()
    ds['NoShow'] = ds['NoShow'].apply(lambda x: 1 if x == 'Yes' else 0)

    # Method to convert 'NoShow' values into binary values (1 for 'Yes' and 0 for 'No').
    # Returns: DataFrame with 'NoShow' column values converted to 1s and 0s.
    return ds


In [None]:
def export_the_dataset():
    df = convert()
    df = df.to_csv('patients.csv', index=False)
    # write your code to export the cleaned dataset and set the index=false and return the same as 'df'
    return df


Generate tables using the cleaned dataset:
 Export the new dataset as 'patients.csv'

Utilize the MySQL database information to login manually and import the cleaned dataset and create the table name 'patients' that contains the below columns.

Gender, ScheduledDay, AppointmentDay, Scholarship, Hypertension, Diabetes, Alcoholism, Handicap, SMSRecevied, NoShow, Age_group
