In [43]:
## Function to take a dataframe and convert it to a SQL table for QuickDBD

import pandas as pd

In [44]:
import re

def generate_quickdbd(df, table_name):
    # Dictionary to map pandas dtypes to SQL dtypes
    dtype_dict = { 'object': 'varchar', 'int64': 'int', 'float64': 'float', 'datetime64[ns]': 'datetime', 'bool': 'boolean' }

    output = []
    output.append(f"{table_name}\n-")
    for i, (column, dtype) in enumerate(df.dtypes.iteritems()):
        # Replace spaces with underscores and remove non-alphanumeric or underscore characters
        cleaned_column = re.sub('\W|^(?=\d)', '_', column.replace(" ", "_"))

        # Map pandas dtypes to SQL dtypes
        sql_dtype = dtype_dict.get(str(dtype), 'varchar')

        # Assume the first column is the primary key (PK)
        if i == 0:
            output.append(f"  {cleaned_column} PK {sql_dtype}")
        else:
            output.append(f"  {cleaned_column} {sql_dtype}")

    return "\n".join(output)


In [45]:
## Load the DF
# Read CSV
df = pd.read_csv("data/2023_mhsaa_POST_LOOKUP2.csv")


df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 494 entries, 0 to 493
Data columns (total 27 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   park_name                 494 non-null    object 
 1   foul                      494 non-null    object 
 2   fop                       494 non-null    object 
 3   notes                     23 non-null     object 
 4   home_plate                494 non-null    object 
 5   foul_area_sqft            494 non-null    float64
 6   fop_area_sqft             494 non-null    float64
 7   field_area_sqft           494 non-null    float64
 8   foul_area_per             494 non-null    float64
 9   fair_to_foul              494 non-null    float64
 10  distances                 494 non-null    object 
 11  max_distance              494 non-null    float64
 12  min_distance              494 non-null    float64
 13  avg_distance              494 non-null    float64
 14  median_dis

In [46]:
# Run And PRINT
# df = pd.DataFrame({"Name": ["Alice", "Bob"], "Age": [25, 30]})
print(generate_quickdbd(df, "ballpark_table"))


ballpark_table
-
  park_name PK varchar
  foul varchar
  fop varchar
  notes varchar
  home_plate varchar
  foul_area_sqft float
  fop_area_sqft float
  field_area_sqft float
  foul_area_per float
  fair_to_foul float
  distances varchar
  max_distance float
  min_distance float
  avg_distance float
  median_distance float
  num_distances int
  max_distance_rank float
  min_distance_rank float
  avg_distance_rank float
  median_distance_rank float
  field_area_rank float
  foul_area_rank float
  fop_area_per_rank float
  ratio_rank float
  fop_centroid varchar
  field_orientation float
  field_cardinal_direction varchar
