In [9]:
import pandas as pd
import sqlite3
import os

# Setting up the file pathways
clinic_data = os.path.join("data", "ALL_CLINICAL_DATA.xlsx")
dmd_data = os.path.join("data", "DMD_Data.xlsx")
sqlite_db = os.path.join("database", "clinical_data.db")


# Loading the excel files as dataframes
df_clinical = pd.read_excel(clinic_data)
df_dmd = pd.read_excel(dmd_data)

# Cleaning up some of the initial missing data
df_clinical.columns = df_clinical.columns.str.strip().str.replace(" ", "_").str.lower()
df_dmd.columns = df_dmd.columns.str.strip().str.replace(" ", "_").str.lower()

# Connect to SQLite
conn = sqlite3.connect(sqlite_db)

# Saving both dataframes to the database as separate tables
df_clinical.to_sql("clinical_table", conn, if_exists="replace", index=False)
df_dmd.to_sql("dmd_table", conn, if_exists="replace", index=False)

# Previewing the data
print("Clinical table preview:")
print(pd.read_sql_query("SELECT * FROM clinical_table LIMIT 5", conn))

print("\nDMD table preview:")
print(pd.read_sql_query("SELECT * FROM dmd_table LIMIT 5", conn))

# Closing up the data before starting more data cleaning
conn.close()


Clinical table preview:
          unnamed:_0 unnamed:_1                   unnamed:_2    unnamed:_3  \
0  ALL CLINICAL DATA       None                         None          None   
1               None       None                         None          None   
2               None       None                         None          None   
3                Kit   Kit Type         Kit.Type Description  Kit Location   
4       SII21058402N     BBTEMP  Bur Block 9 - Temporization           FPC   

       unnamed:_4 unnamed:_5    unnamed:_6    unnamed:_7           unnamed:_8  
0            None       None          None          None                 None  
1            None       None          None          None                 None  
2            None       None          None          None                 None  
3  Kit Times Used    Kit Out  Kit Date Out  Kit Time Out  Kit.Item Cost/Value  
4               0         No          None          None                 2.27  

DMD table preview:
      u

In [54]:
import pandas as pd
import sqlite3
import os

# Setting up the file pathways
clinic_data = os.path.join("data", "ALL_CLINICAL_DATA.xlsx")
sqlite_db = os.path.join("database", "clinical_data.db")


# Changing the header so that the data reads properly
df_clinical = pd.read_excel(clinic_data, header=4)

# Removing null data columns
df_clinical = df_clinical.drop(columns=[
    "Kit Times Used",
    "Kit Out",
    "Kit Date Out",
    "Kit Time Out"
])

df_clinical.columns = (
    df_clinical.columns.str.strip()
              .str.replace(".", " ")
)

# Checking the remaining column names
print(df_clinical.columns.tolist())

print(df_clinical)

['Kit', 'Kit Type', 'Kit Type Description', 'Kit Location', 'Kit Item Cost/Value']
               Kit Kit Type            Kit Type Description Kit Location  \
0     SII21058402N   BBTEMP     Bur Block 9 - Temporization          FPC   
1     SII21062844N    TOPER            Tech Kit - Operative          FPC   
2     SII21060903N    CURET                   Curettage Kit          FPC   
3     SII21060903N    CURET                   Curettage Kit          FPC   
4     SII21060904N    CURET                   Curettage Kit          FPC   
...            ...      ...                             ...          ...   
1627   SII21491605   GPR PR                GPR PROS KIT KYC       KYCGPR   
1628   SII21504239     PPFS  PerioPrecision Fixation System        PERIO   
1629   SII21864096    OMSSH           OMS STRAIGHT BIEN AIR         OMFS   
1630  SII21062545N   KYCG7E           KYC G7 EXTRACTION KIT       KYCGPR   
1631   SII21837734   KYCCUT                      KYC Cube T       KYCGPR   

    