In [39]:
## Function to take a dataframe and convert it to a SQL table for QuickDBD

import pandas as pd

In [40]:
import re

def generate_quickdbd(df, table_name):
    # Dictionary to map pandas dtypes to SQL dtypes
    dtype_dict = { 'object': 'varchar', 'int64': 'int', 'float64': 'float', 'datetime64[ns]': 'datetime', 'bool': 'boolean' }

    output = []
    output.append(f"{table_name}\n-")
    for i, (column, dtype) in enumerate(df.dtypes.iteritems()):
        # Replace spaces with underscores and remove non-alphanumeric or underscore characters
        cleaned_column = re.sub('\W|^(?=\d)', '_', column.replace(" ", "_"))

        # Map pandas dtypes to SQL dtypes
        sql_dtype = dtype_dict.get(str(dtype), 'varchar')

        # Assume the first column is the primary key (PK)
        if i == 0:
            output.append(f"  {cleaned_column} PK {sql_dtype}")
        else:
            output.append(f"  {cleaned_column} {sql_dtype}")

    return "\n".join(output)


In [41]:
## Load the DF
# Read CSV
df = pd.read_csv("TEMP\clean_tables\public_school_data_transformed.csv")


df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1314 entries, 0 to 1313
Data columns (total 36 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   school_name                       1314 non-null   object 
 1   school_id                         1314 non-null   float64
 2   school_district                   1314 non-null   object 
 3   school_district_id                1314 non-null   int64  
 4   total_students                    1314 non-null   int64  
 5   teachers                          1314 non-null   float64
 6   student_teacher_ratio             1314 non-null   object 
 7   lunch_free_count                  1314 non-null   object 
 8   lunch_reduced_count               1314 non-null   object 
 9   lunch_total_count                 1314 non-null   object 
 10  9                                 974 non-null    float64
 11  10                                972 non-null    float64
 12  11    

In [42]:
# Run And PRINT
# df = pd.DataFrame({"Name": ["Alice", "Bob"], "Age": [25, 30]})
print(generate_quickdbd(df, "public school data"))


public school data
-
  school_name PK varchar
  school_id float
  school_district varchar
  school_district_id int
  total_students int
  teachers float
  student_teacher_ratio varchar
  lunch_free_count varchar
  lunch_reduced_count varchar
  lunch_total_count varchar
  _9 float
  _10 float
  _11 float
  _12 float
  American_Indian_Alaska_Native float
  Asian float
  Black float
  Hispanic float
  Native_Hawaiian_Pacific_Islander float
  White float
  Two_or_MoreRaces float
  Male int
  Female int
  Male_Pct varchar
  Female_Pct varchar
  KG float
  _1 float
  _2 float
  _3 float
  _4 float
  _5 float
  _6 float
  _7 float
  _8 float
  PK varchar
  Ungraded float
