In [1]:
## Function to take a dataframe and convert it to a SQL table for QuickDBD

import pandas as pd

In [2]:
import re

def generate_quickdbd(df, table_name):
    # Dictionary to map pandas dtypes to SQL dtypes
    dtype_dict = { 'object': 'varchar', 'int64': 'int', 'float64': 'float', 'datetime64[ns]': 'datetime', 'bool': 'boolean' }

    output = []
    output.append(f"{table_name}\n-")
    for i, (column, dtype) in enumerate(df.dtypes.iteritems()):
        # Replace spaces with underscores and remove non-alphanumeric or underscore characters
        cleaned_column = re.sub('\W|^(?=\d)', '_', column.replace(" ", "_"))

        # Map pandas dtypes to SQL dtypes
        sql_dtype = dtype_dict.get(str(dtype), 'varchar')

        # Assume the first column is the primary key (PK)
        if i == 0:
            output.append(f"  {cleaned_column} PK {sql_dtype}")
        else:
            output.append(f"  {cleaned_column} {sql_dtype}")

    return "\n".join(output)


In [3]:
## Load the DF
# Read CSV
df = pd.read_csv("MHSAA_team_stats_Thu_Jul_22_2021_11-30.csv")


df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 602 entries, 0 to 601
Data columns (total 25 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   primary_team           602 non-null    object 
 1   games                  584 non-null    float64
 2   home_games             599 non-null    float64
 3   away_games             587 non-null    float64
 4   wins                   584 non-null    float64
 5   home_wins              599 non-null    float64
 6   away_wins              587 non-null    float64
 7   losses                 584 non-null    float64
 8   home_losses            599 non-null    float64
 9   away_losses            587 non-null    float64
 10  ties                   584 non-null    float64
 11  home_ties              599 non-null    float64
 12  away_ties              587 non-null    float64
 13  overall_win_pct        584 non-null    float64
 14  home_win_pct           599 non-null    float64
 15  away_w

In [4]:
# Run And PRINT
# df = pd.DataFrame({"Name": ["Alice", "Bob"], "Age": [25, 30]})
print(generate_quickdbd(df, "team_yearly_stats_example"))


team_yearly_stats_example
-
  primary_team PK varchar
  games float
  home_games float
  away_games float
  wins float
  home_wins float
  away_wins float
  losses float
  home_losses float
  away_losses float
  ties float
  home_ties float
  away_ties float
  overall_win_pct float
  home_win_pct float
  away_win_pct float
  home_avg_runs_scored float
  away_avg_runs_scored float
  runs_scored_change float
  home_avg_runs_allowed float
  away_avg_runs_allowed float
  runs_allowed_change float
  home_run_diff float
  away_run_diff float
  run_diff_change float
