<div style="background-color:#f4f8ff; padding:16px; border-left:6px solid #1f4fd8; border-radius:6px; color:#000;">

<h2 style="color:#000; margin-top:0;">Database Exploration</h2>

<h4 style="color:#000;">Purpose</h4>
<ul>
  <li>Explore the overall structure of the SQL Server database.</li>
  <li>Identify available schemas and tables for analysis.</li>
  <li>Inspect column-level metadata such as data types and nullability.</li>
</ul>

<h4 style="color:#000;">Tables Used</h4>
<ul>
  <li><b>INFORMATION_SCHEMA.TABLES</b><br>
      Used to list all tables along with their respective schemas.</li>
  <li><b>INFORMATION_SCHEMA.COLUMNS</b><br>
      Used to examine column names, data types, and constraints for selected tables.</li>
</ul>

<h4 style="color:#000;">Outcome</h4>
<ul>
  <li>Clear understanding of the database layout.</li>
  <li>Identification of analytical tables, primarily from the Gold layer.</li>
  <li>Strong foundation for accurate and efficient Exploratory Data Analysis (EDA).</li>
</ul>

</div>


<div style="background-color:#f4f8ff; padding:10px; border-left:6px solid #1f4fd8; border-radius:6px; color:#000; font-size:20px;">
<b>1. Retrieve a list of all tables in the database</b>
</div>


In [3]:
query = """
SELECT 
    TABLE_CATALOG, 
    TABLE_SCHEMA, 
    TABLE_NAME, 
    TABLE_TYPE
FROM INFORMATION_SCHEMA.TABLES;
"""

df = pd.read_sql(query, engine)
display(HTML(df.to_html(index=False)))

TABLE_CATALOG,TABLE_SCHEMA,TABLE_NAME,TABLE_TYPE
DataWarehouse,bronze,crm_cust_info,BASE TABLE
DataWarehouse,bronze,crm_prd_info,BASE TABLE
DataWarehouse,bronze,crm_sales_details,BASE TABLE
DataWarehouse,bronze,erp_loc_a101,BASE TABLE
DataWarehouse,bronze,erp_cust_az12,BASE TABLE
DataWarehouse,bronze,erp_px_cat_g1v2,BASE TABLE
DataWarehouse,silver,crm_cust_info,BASE TABLE
DataWarehouse,silver,crm_prd_info,BASE TABLE
DataWarehouse,silver,crm_sales_details,BASE TABLE
DataWarehouse,silver,erp_loc_a101,BASE TABLE


<div style="background-color:#f4f8ff; padding:10px; border-left:6px solid #1f4fd8; border-radius:6px; color:#000; font-size:20px;">
<strong>2. Retrieve all columns for (dim_customers)</strong>
</div>


In [4]:
query = """
SELECT 
    COLUMN_NAME, 
    DATA_TYPE, 
    IS_NULLABLE, 
    CHARACTER_MAXIMUM_LENGTH
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_NAME = 'dim_customers';
"""

df = pd.read_sql(query, engine)
display(HTML(df.to_html(index=False)))

COLUMN_NAME,DATA_TYPE,IS_NULLABLE,CHARACTER_MAXIMUM_LENGTH
customer_key,bigint,YES,
customer_id,int,YES,
customer_number,nvarchar,YES,50.0
first_name,nvarchar,YES,50.0
last_name,nvarchar,YES,50.0
country,nvarchar,YES,50.0
marital_status,nvarchar,YES,50.0
gender,nvarchar,YES,50.0
birthdate,date,YES,
create_date,date,YES,


<div style="background-color:#f4f8ff; padding:10px; border-left:6px solid #1f4fd8; border-radius:6px; color:#000; font-size:20px;">
<strong>3. Retrieve all columns for (dim_products)</strong>
</div>

In [5]:
query = """
SELECT 
    COLUMN_NAME, 
    DATA_TYPE, 
    IS_NULLABLE, 
    CHARACTER_MAXIMUM_LENGTH
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_NAME = 'dim_products';
"""

df = pd.read_sql(query, engine)
display(HTML(df.to_html(index=False)))

COLUMN_NAME,DATA_TYPE,IS_NULLABLE,CHARACTER_MAXIMUM_LENGTH
product_key,bigint,YES,
product_id,int,YES,
product_number,nvarchar,YES,50.0
product_name,nvarchar,YES,50.0
category_id,nvarchar,YES,50.0
category,nvarchar,YES,50.0
subcategory,nvarchar,YES,50.0
maintenance,nvarchar,YES,50.0
cost,int,YES,
product_line,nvarchar,YES,50.0


<div style="background-color:#f4f8ff; padding:10px; border-left:6px solid #1f4fd8; border-radius:6px; color:#000; font-size:20px;">
<strong>4. Retrieve all columns for (fact_sales)</strong>
</div>

In [6]:
query = """
SELECT 
    COLUMN_NAME, 
    DATA_TYPE, 
    IS_NULLABLE, 
    CHARACTER_MAXIMUM_LENGTH
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_NAME = 'fact_sales';
"""

df = pd.read_sql(query, engine)
df

Unnamed: 0,COLUMN_NAME,DATA_TYPE,IS_NULLABLE,CHARACTER_MAXIMUM_LENGTH
0,order_number,nvarchar,YES,50.0
1,product_key,bigint,YES,
2,customer_key,bigint,YES,
3,order_date,date,YES,
4,shipping_date,date,YES,
5,due_date,date,YES,
6,sales_amount,int,YES,
7,quantity,int,YES,
8,price,int,YES,
