In [0]:
import pandas as pd

In [0]:
!pip install mysql-connector-python

In [0]:
def read_csv_file(file_path):
  df=pd.read_csv(file_path)
  print(f'total records in file is -> {df.count()}')

  return df

In [0]:
import mysql.connector
from mysql.connector import Error

def create_table_insert_data(hostname,database,username,password,port,df,tablename,create_table_query,insert_query):
  try:
    connection = mysql.connector.connect(host=hostname, database=database, user=username, password=password, port=port)
    if connection.is_connected():
      print('Connected to MYSQL server successfully!!')
      cursor = connection.cursor()
      print(create_table_query)

      cursor.execute(create_table_query)

      print(f'Table {tablename} created successfully!!!')

      # Step 6: Insert data in batches of 500 records
      batch_size = 1000  # Define the batch size
      total_records = len(df)  # Get total records in the DataFrame

      print(f"Starting data insertion into `{tablename}` in batches of {batch_size} records.")
      for start in range(0, total_records, batch_size):
          end = start + batch_size
          batch = df.iloc[start:end]  # Get the current batch of records

          # Convert batch to list of tuples for MySQL insertion
          batch_records = [
                tuple(row) for row in batch.itertuples(index=False, name=None)
            ]

          # Execute the insertion query for the batch
          cursor.executemany(insert_query, batch_records)
          connection.commit()  # Commit after each batch
          print(f"Inserted records {start + 1} to {min(end, total_records)} successfully.")

      print(f"All {total_records} records inserted successfully into `{tablename}`.")

  except Error as e:
    print("Error while connecting to MySQL", e)
  finally:
    if connection.is_connected():
        cursor.close()
        connection.close()
        print("MySQL connection is closed")



In [0]:
create_table_query_departments=f"""
CREATE TABLE departments (
    DeptID nvarchar(50) NOT NULL,
    Name nvarchar(50) NOT NULL,
    CONSTRAINT PK_departments PRIMARY KEY (DeptID)
);
      """

create_table_query_encounters=f"""
CREATE TABLE encounters (
    EncounterID nvarchar(50) NOT NULL,
    PatientID nvarchar(50) NOT NULL,
    EncounterDate date NOT NULL,
    EncounterType nvarchar(50) NOT NULL,
    ProviderID nvarchar(50) NOT NULL,
    DepartmentID nvarchar(50) NOT NULL,
    ProcedureCode int NOT NULL,
    InsertedDate date NOT NULL,
    ModifiedDate date NOT NULL,
    CONSTRAINT PK_encounters PRIMARY KEY (EncounterID)
);
"""

create_table_query_patients=f"""
CREATE TABLE patients (
    ID nvarchar(50) NOT NULL,
    F_Name nvarchar(50) NOT NULL,
    L_Name nvarchar(50) NOT NULL,
    M_Name nvarchar(50) NOT NULL,
    SSN nvarchar(50) NOT NULL,
    PhoneNumber nvarchar(50) NOT NULL,
    Gender nvarchar(50) NOT NULL,
    DOB date NOT NULL,
    Address nvarchar(100) NOT NULL,
    Updated_Date date NOT NULL,
    CONSTRAINT PK_patients PRIMARY KEY (ID)
);
"""

create_table_query_providers=f"""
CREATE TABLE providers (
    ProviderID nvarchar(50) NOT NULL,
    FirstName nvarchar(50) NOT NULL,
    LastName nvarchar(50) NOT NULL,
    Specialization nvarchar(50) NOT NULL,
    DeptID nvarchar(50) NOT NULL,
    NPI bigint NOT NULL,
    CONSTRAINT PK_providers PRIMARY KEY (ProviderID)
);
"""

create_table_query_transactions=f"""
CREATE TABLE transactions (
    TransactionID nvarchar(50) NOT NULL,
    EncounterID nvarchar(50) NOT NULL,
    PatientID nvarchar(50) NOT NULL,
    ProviderID nvarchar(50) NOT NULL,
    DeptID nvarchar(50) NOT NULL,
    VisitDate date NOT NULL,
    ServiceDate date NOT NULL,
    PaidDate date NOT NULL,
    VisitType nvarchar(50) NOT NULL,
    Amount float NOT NULL,
    AmountType nvarchar(50) NOT NULL,
    PaidAmount float NOT NULL,
    ClaimID nvarchar(50) NOT NULL,
    PayorID nvarchar(50) NOT NULL,
    ProcedureCode int NOT NULL,
    ICDCode nvarchar(50) NOT NULL,
    LineOfBusiness nvarchar(50) NOT NULL,
    MedicaidID nvarchar(50) NOT NULL,
    MedicareID nvarchar(50) NOT NULL,
    InsertDate date NOT NULL,
    ModifiedDate date NOT NULL,
    CONSTRAINT PK_transactions PRIMARY KEY (TransactionID)
);
"""

In [0]:
def insert_into_command(tablename,df):
  columns = ', '.join(df.columns)
  placeholders = ', '.join(['%s'] * len(df.columns))
  sql = f"INSERT INTO {tablename} ({columns}) VALUES ({placeholders})"

  return sql

In [0]:
tables=["departments","encounters","patients","providers","transactions"]
files=["hospital2_department_data.csv","hospital2_encounter_data.csv","hospital2_patients_data.csv","hospital2_provider_data.csv","hospital2_transaction_data.csv"]
create_table_queries=[create_table_query_departments,create_table_query_encounters,create_table_query_patients,create_table_query_providers,create_table_query_transactions]

In [0]:

for i in range(0,len(files)):
  df=read_csv_file(files[i])
  create_table_query=create_table_queries[i]
  insert_query=insert_into_command(tables[i],df)
  tablename=tables[i]

  if tablename!='patients':
    continue

  create_table_insert_data(hostname_hospitalB,database_hospitalB,username_hospitalB,password_hospitalB,port_hospitalB,df,tablename,create_table_query,insert_query)