In [1]:
# Load necessary packages:
from sqlalchemy import inspect, create_engine, Column, String, Integer, Boolean, BigInteger, Float, text # Database navigation
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
import mysql.connector
import sqlite3 # A second option for working with databases
import pandas as pd # Python data manilpulation

In [2]:
# Connecting to the MySQL server 
conn = mysql.connector.connect(
    host="localhost",
    user="root",
    password="DataGathering2025"
)

cursor = conn.cursor()

# Create the database if it doesn't exist
cursor.execute("CREATE DATABASE IF NOT EXISTS BrooksRun")
print("Database created successfully!")

# Select the database
cursor.execute("USE BrooksRun")

Database created successfully!


In [3]:
# Connect to your MySQL database using SQLAlchemy
DATABASE_URL = "mysql+mysqlconnector://root:DataGathering2025@localhost/BrooksRun"
engine = create_engine(DATABASE_URL)

print("Connected to BrooksRun database successfully!")

Connected to BrooksRun database successfully!


In [4]:
# Load the DataFrames into the database as their respective tables
BrooksRun_df = pd.read_csv("BrooksRun.csv")

# Confirm they are DataFrames
BrooksRun_df = pd.DataFrame(BrooksRun_df)

In [11]:
BrooksRun_df.head()

Unnamed: 0,name,gender,price,support,experience,surface,midsole_drop_mm,weight_g,high_arch,medium_arch,flat_arch,customer_id,price_range,customer_support,run_type,arch_type,customer_gender
0,Addiction 14,Male,130.0,Max Support,Cushion,Road,12.0,357.2,False,True,True,1,220,Support,Trail,Medium Arch,Male
1,Adrenaline GTS 20,Male,130.0,Support,Cushion,Road,12.0,300.5,True,True,False,2,213,Support,Trail,High Arch,Female
2,Anthem 3,Male,69.95,Neutral,Cushion,Road,10.0,212.6,True,True,False,3,70,Max Support,Road,Flat Arch,Male
3,Asteria,Male,110.0,Support,Speed,Road,8.0,235.3,False,True,True,4,156,Support,Trail,High Arch,Male
4,Beast 20,Male,160.0,Max Support,Cushion,Road,12.0,331.7,False,True,True,5,165,Support,Road,High Arch,Female


In [5]:
# Establishing a connection to the database
with engine.connect() as connection:
    
    # Creating the BrooksCustomers table
    create_brooks_customers_table = """
    CREATE TABLE IF NOT EXISTS BrooksCustomers (
        customer_id BIGINT AUTO_INCREMENT PRIMARY KEY,
        customer_gender VARCHAR(500)
    );
    """

    # Creating the CustomerPreferences table
    create_customer_preferences_table = """
    CREATE TABLE IF NOT EXISTS CustomerPreferences (
        customer_id BIGINT,
        price_range BIGINT,
        customer_support VARCHAR(500),
        run_type VARCHAR(500),
        arch_type VARCHAR(500),
        FOREIGN KEY (customer_id) REFERENCES BrooksCustomers(customer_id)
    );
    """

    # Creating the BrooksShoes table
    create_brooks_shoes_table = """
    CREATE TABLE IF NOT EXISTS BrooksShoes (
        name VARCHAR(500) PRIMARY KEY,
        price DOUBLE,
        gender VARCHAR(500),
        weight_g INT
    );
    """

    # Executing all table creation queries
    connection.execute(text(create_brooks_customers_table))
    connection.execute(text(create_customer_preferences_table))
    connection.execute(text(create_brooks_shoes_table))

print("All functional-design tables created successfully!")

All functional-design tables created successfully!


In [6]:
# Establishing a connection to the database
with engine.connect() as connection:
    # Iterate through DataFrame rows
    for index, row in BrooksRun_df.iterrows():
        
        # Handle NaN values for customer data
        customer_gender = row['customer_gender'] if pd.notna(row['customer_gender']) else ''  # Replace NaN with empty string
        price_range = row['price_range'] if pd.notna(row['price_range']) else 0  # Replace NaN with default value 0
        customer_support = row['customer_support'] if pd.notna(row['customer_support']) else ''  # Replace NaN with empty string
        run_type = row['run_type'] if pd.notna(row['run_type']) else ''  # Replace NaN with empty string
        arch_type = row['arch_type'] if pd.notna(row['arch_type']) else ''  # Replace NaN with empty string
        
        # Insert data into the 'BrooksCustomers' table
        customer_query = f"""
            INSERT INTO BrooksCustomers (customer_id, customer_gender) 
            VALUES ({row['customer_id']}, '{customer_gender}')
            ON DUPLICATE KEY UPDATE customer_gender = VALUES(customer_gender)
        """
        connection.execute(text(customer_query))
        
        # Now, retrieve the customer_id (assuming gender is unique or customer_id is unique)
        customer_id = row['customer_id']

        # Insert data into the 'CustomerPreferences' table
        customer_pref_query = f"""
            INSERT INTO CustomerPreferences (
                customer_id, price_range, customer_support, run_type, arch_type
            ) VALUES (
                {customer_id}, 
                {price_range}, 
                '{customer_support}',
                '{run_type}',
                '{arch_type}'
            )
        """
        connection.execute(text(customer_pref_query))

        # Handle NaN values for shoe data
        name = row['name'] if pd.notna(row['name']) else ''  # Replace NaN with empty string
        price = row['price'] if pd.notna(row['price']) else 0  # Replace NaN with 0
        gender = row['gender'] if pd.notna(row['gender']) else ''  # Replace NaN with empty string
        weight_g = row['weight_g'] if pd.notna(row['weight_g']) else 0  # Replace NaN with 0
        
        # Insert data into the 'BrooksShoes' table with parameterized query
        shoes_query = """
            INSERT INTO BrooksShoes (
                name, price, gender, weight_g
            ) VALUES (
                :name, :price, :gender, :weight_g
            )
            ON DUPLICATE KEY UPDATE 
                price = VALUES(price),
                gender = VALUES(gender),
                weight_g = VALUES(weight_g)
        """
        
        # Execute the query using the 'text' object and passing parameters as a dictionary
        connection.execute(text(shoes_query), {
            'name': name,
            'price': price,
            'gender': gender,
            'weight_g': weight_g
        })
    
    # Commit the transaction (automatically handled with engine.connect() and execution)
    print("All data inserted into functional tables successfully!")

All data inserted into functional tables successfully!


In [None]:
#Since I finally got it to work this way
#Adding the rest of the columns to these tables


In [7]:
#going to look at the database now
# Create an inspector
inspector = inspect(engine)

# Get all table names
tables = inspector.get_table_names()
print("Tables in the database:", tables)

# Loop through each table and print its columns
for table in tables:
    columns = inspector.get_columns(table)
    print(f"\nColumns in table '{table}':")
    for col in columns:
        print(f" - {col['name']} ({col['type']})")

Tables in the database: ['BrooksCustomers', 'BrooksShoes', 'CustomerPreferences']

Columns in table 'BrooksCustomers':
 - customer_id (BIGINT)
 - customer_gender (VARCHAR(500))

Columns in table 'BrooksShoes':
 - name (VARCHAR(500))
 - price (DOUBLE)
 - gender (VARCHAR(500))
 - weight_g (INTEGER)

Columns in table 'CustomerPreferences':
 - customer_id (BIGINT)
 - price_range (BIGINT)
 - customer_support (VARCHAR(500))
 - run_type (VARCHAR(500))
 - arch_type (VARCHAR(500))
