In [1]:
# create a new db using sqlite 3
import sqlite3

# Create a new database (or connect if it already exists)
conn = sqlite3.connect("Employee_Information.db")

# Create a cursor object
cursor = conn.cursor()

# Create a new table
cursor.execute('''CREATE TABLE IF NOT EXISTS users (id INTEGER PRIMARY KEY, name TEXT, age INTEGER)''')

# Commit changes and close the connection
conn.commit()
conn.close()

In [2]:
# Insert 100 rows of sample data into the users table
import sqlite3
import random

# Sample names to choose from
first_names = ["John", "Jane", "Michael", "Sarah", "David", "Emily", "Chris", "Jessica", "Matthew", "Ashley",
               "Daniel", "Amanda", "Ryan", "Stephanie", "Andrew", "Jennifer", "Joshua", "Rachel", "Brandon", "Megan",
               "Tyler", "Nicole", "Kevin", "Elizabeth", "Justin", "Rebecca", "Jason", "Laura", "Robert", "Samantha",
               "Jonathan", "Amy", "Adam", "Michelle", "Brian", "Kimberly", "James", "Lisa", "Steven", "Angela"]

last_names = ["Smith", "Johnson", "Williams", "Brown", "Jones", "Garcia", "Miller", "Davis", "Rodriguez", "Martinez",
              "Hernandez", "Lopez", "Gonzalez", "Wilson", "Anderson", "Thomas", "Taylor", "Moore", "Jackson", "Martin",
              "Lee", "Perez", "Thompson", "White", "Harris", "Sanchez", "Clark", "Ramirez", "Lewis", "Robinson",
              "Walker", "Young", "Allen", "King", "Wright", "Scott", "Torres", "Nguyen", "Hill", "Flores"]

# Connect to the database
conn = sqlite3.connect("Employee_Information.db")
cursor = conn.cursor()

# Generate and insert 100 rows of sample data
sample_data = []
for i in range(100):
    name = f"{random.choice(first_names)} {random.choice(last_names)}"
    age = random.randint(18, 65)  # Random age between 18 and 65
    sample_data.append((name, age))

# Insert the data
cursor.executemany("INSERT INTO users (name, age) VALUES (?, ?)", sample_data)

# Commit changes
conn.commit()

# Verify the data was inserted by counting rows
cursor.execute("SELECT COUNT(*) FROM users")
row_count = cursor.fetchone()[0]
print(f"Successfully inserted data! Total rows in users table: {row_count}")

# Show a few sample rows
cursor.execute("SELECT * FROM users LIMIT 10")
sample_rows = cursor.fetchall()
print("\nFirst 10 rows:")
for row in sample_rows:
    print(f"ID: {row[0]}, Name: {row[1]}, Age: {row[2]}")

# Close the connection
conn.close()

Successfully inserted data! Total rows in users table: 200

First 10 rows:
ID: 1, Name: John Jackson, Age: 41
ID: 2, Name: Justin Clark, Age: 34
ID: 3, Name: Matthew Garcia, Age: 59
ID: 4, Name: Samantha Lewis, Age: 20
ID: 5, Name: Justin Ramirez, Age: 64
ID: 6, Name: Michelle Thomas, Age: 60
ID: 7, Name: Jason Anderson, Age: 52
ID: 8, Name: Brandon Lewis, Age: 44
ID: 9, Name: Robert Garcia, Age: 46
ID: 10, Name: Andrew Robinson, Age: 33


In [3]:
# Create additional tables for star schema: departments and employee_records (fact table)
import sqlite3
import random
from datetime import datetime, timedelta

# Connect to the database
conn = sqlite3.connect("Employee_Information.db")
cursor = conn.cursor()

# Create departments table (dimension table)
cursor.execute('''CREATE TABLE IF NOT EXISTS departments (
    dept_id INTEGER PRIMARY KEY,
    dept_name TEXT NOT NULL,
    dept_head TEXT,
    location TEXT,
    budget INTEGER
)''')

# Create employee_records table (fact table) - connects users to departments with additional metrics
cursor.execute('''CREATE TABLE IF NOT EXISTS employee_records (
    record_id INTEGER PRIMARY KEY,
    user_id INTEGER,
    dept_id INTEGER,
    hire_date DATE,
    salary INTEGER,
    performance_score REAL,
    hours_worked INTEGER,
    FOREIGN KEY (user_id) REFERENCES users (id),
    FOREIGN KEY (dept_id) REFERENCES departments (dept_id)
)''')

# Insert department data
departments_data = [
    ("Engineering", "Sarah Wilson", "Building A", 500000),
    ("Marketing", "Michael Brown", "Building B", 200000),
    ("Sales", "Jessica Davis", "Building B", 300000),
    ("Human Resources", "David Garcia", "Building C", 150000),
    ("Finance", "Emily Johnson", "Building C", 250000),
    ("Operations", "Chris Martinez", "Building A", 180000),
    ("Customer Support", "Amanda Rodriguez", "Building D", 120000),
    ("Research & Development", "Matthew Lopez", "Building A", 400000)
]

cursor.executemany("INSERT OR IGNORE INTO departments (dept_name, dept_head, location, budget) VALUES (?, ?, ?, ?)", 
                   departments_data)

# Get all user IDs and department IDs to create relationships
cursor.execute("SELECT id FROM users")
user_ids = [row[0] for row in cursor.fetchall()]

cursor.execute("SELECT dept_id FROM departments")
dept_ids = [row[0] for row in cursor.fetchall()]

# Generate employee records (fact table data)
employee_records = []
base_date = datetime(2020, 1, 1)

for user_id in user_ids:
    # Each employee is assigned to one department
    dept_id = random.choice(dept_ids)
    
    # Generate hire date (within last 5 years)
    hire_date = base_date + timedelta(days=random.randint(0, 1825))
    
    # Generate salary based on department (with some variation)
    base_salaries = {1: 85000, 2: 55000, 3: 60000, 4: 65000, 5: 70000, 6: 58000, 7: 45000, 8: 90000}
    base_salary = base_salaries.get(dept_id, 60000)
    salary = base_salary + random.randint(-15000, 25000)
    
    # Generate performance score (1.0 to 5.0)
    performance_score = round(random.uniform(2.0, 5.0), 2)
    
    # Generate hours worked this month (120-200 hours)
    hours_worked = random.randint(120, 200)
    
    employee_records.append((user_id, dept_id, hire_date.strftime('%Y-%m-%d'), 
                           salary, performance_score, hours_worked))

# Insert employee records
cursor.executemany("""INSERT INTO employee_records 
                      (user_id, dept_id, hire_date, salary, performance_score, hours_worked) 
                      VALUES (?, ?, ?, ?, ?, ?)""", employee_records)

# Commit changes
conn.commit()

# Verify the star schema by showing table counts and sample joins
print("=== STAR SCHEMA CREATED ===")
print()

# Show table counts
cursor.execute("SELECT COUNT(*) FROM users")
users_count = cursor.fetchone()[0]
print(f"Users (dimension): {users_count} records")

cursor.execute("SELECT COUNT(*) FROM departments")
dept_count = cursor.fetchone()[0]
print(f"Departments (dimension): {dept_count} records")

cursor.execute("SELECT COUNT(*) FROM employee_records")
records_count = cursor.fetchone()[0]
print(f"Employee Records (fact): {records_count} records")

print("\n=== SAMPLE DEPARTMENTS ===")
cursor.execute("SELECT * FROM departments LIMIT 5")
for row in cursor.fetchall():
    print(f"Dept {row[0]}: {row[1]} | Head: {row[2]} | Location: {row[3]} | Budget: ${row[4]:,}")

print("\n=== SAMPLE STAR SCHEMA JOIN ===")
query = """
SELECT u.name, d.dept_name, er.salary, er.performance_score, er.hire_date
FROM users u
JOIN employee_records er ON u.id = er.user_id
JOIN departments d ON er.dept_id = d.dept_id
LIMIT 10
"""
cursor.execute(query)
print("Employee | Department | Salary | Performance | Hire Date")
print("-" * 60)
for row in cursor.fetchall():
    print(f"{row[0]:<20} | {row[1]:<12} | ${row[2]:,} | {row[3]} | {row[4]}")

# Close connection
conn.close()
print("\nStar schema successfully created!")

=== STAR SCHEMA CREATED ===

Users (dimension): 200 records
Departments (dimension): 16 records
Employee Records (fact): 300 records

=== SAMPLE DEPARTMENTS ===
Dept 1: Engineering | Head: Sarah Wilson | Location: Building A | Budget: $500,000
Dept 2: Marketing | Head: Michael Brown | Location: Building B | Budget: $200,000
Dept 3: Sales | Head: Jessica Davis | Location: Building B | Budget: $300,000
Dept 4: Human Resources | Head: David Garcia | Location: Building C | Budget: $150,000
Dept 5: Finance | Head: Emily Johnson | Location: Building C | Budget: $250,000

=== SAMPLE STAR SCHEMA JOIN ===
Employee | Department | Salary | Performance | Hire Date
------------------------------------------------------------
John Jackson         | Marketing    | $62,655 | 4.32 | 2024-05-27
Justin Clark         | Engineering  | $74,797 | 4.65 | 2023-06-04
Matthew Garcia       | Marketing    | $43,761 | 4.55 | 2024-08-12
Samantha Lewis       | Finance      | $92,330 | 2.2 | 2022-12-14
Justin Ramirez 

In [4]:
# Display the complete star schema structure
import sqlite3

# Connect to the database
conn = sqlite3.connect("Employee_Information.db")
cursor = conn.cursor()

def print_table_schema(table_name):
    """Print detailed schema information for a table"""
    cursor.execute(f"PRAGMA table_info({table_name})")
    columns = cursor.fetchall()
    
    print(f"\n TABLE: {table_name.upper()}")
    print("=" * 50)
    print("Column Name      | Type      | Null | Default | Primary Key")
    print("-" * 50)
    
    for col in columns:
        cid, name, data_type, not_null, default_value, pk = col
        null_ok = "NO" if not_null else "YES"
        default_str = str(default_value) if default_value else "None"
        pk_str = "YES" if pk else "NO"
        print(f"{name:<16} | {data_type:<9} | {null_ok:<4} | {default_str:<7} | {pk_str}")

def print_foreign_keys(table_name):
    """Print foreign key relationships for a table"""
    cursor.execute(f"PRAGMA foreign_key_list({table_name})")
    foreign_keys = cursor.fetchall()
    
    if foreign_keys:
        print(f"\n FOREIGN KEYS for {table_name}:")
        for fk in foreign_keys:
            print(f"   {fk[3]} -> {fk[2]}.{fk[4]}")

def print_table_sample_data(table_name, limit=5):
    """Print sample data from a table"""
    cursor.execute(f"SELECT * FROM {table_name} LIMIT {limit}")
    rows = cursor.fetchall()
    
    if rows:
        print(f"\n SAMPLE DATA from {table_name} (first {limit} rows):")
        cursor.execute(f"PRAGMA table_info({table_name})")
        columns = [col[1] for col in cursor.fetchall()]
        
        # Print header
        header = " | ".join([col[:12] for col in columns])
        print(header)
        print("-" * len(header))
        
        # Print data rows
        for row in rows:
            formatted_row = []
            for item in row:
                if isinstance(item, str) and len(item) > 12:
                    formatted_row.append(item[:10] + "..")
                else:
                    formatted_row.append(str(item)[:12])
            print(" | ".join(formatted_row))

print(" COMPLETE STAR SCHEMA STRUCTURE")
print("=" * 60)

# Get all tables in the database
cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
tables = [row[0] for row in cursor.fetchall()]

print(f"\n DATABASE: Employee_Information.db")
print(f"Total Tables: {len(tables)}")
print(f"Tables: {', '.join(tables)}")

# Print schema for each table
for table in tables:
    print_table_schema(table)
    print_foreign_keys(table)
    print_table_sample_data(table)
    print()

# Show the star schema relationships visually
print("\n STAR SCHEMA RELATIONSHIP DIAGRAM")
print("=" * 50)
print("""
                    ┌─────────────────┐
                    │   DEPARTMENTS   │ ◄─── Dimension Table
                    │   (8 records)   │
                    │                 │
                    │ • dept_id (PK)  │
                    │ • dept_name     │
                    │ • dept_head     │
                    │ • location      │
                    │ • budget        │
                    └─────────┬───────┘
                              │
                              │ 1:M relationship
                              ▼
                    ┌─────────────────┐
                    │ EMPLOYEE_RECORDS│ ◄─── Fact Table
                    │  (100 records)  │      (Central Hub)
                    │                 │
                    │ • record_id(PK) │
                    │ • user_id (FK)  │ ──────┐
                    │ • dept_id (FK)  │       │
                    │ • hire_date     │       │
                    │ • salary        │       │ M:1 relationship
                    │ • performance   │       │
                    │ • hours_worked  │       ▼
                    └─────────────────┘ ┌─────────────────┐
                                        │     USERS       │ ◄─── Dimension Table
                                        │  (100 records)  │
                                        │                 │
                                        │ • id (PK)       │
                                        │ • name          │
                                        │ • age           │
                                        └─────────────────┘
""")

# Show key business metrics available through the star schema
print("\n KEY BUSINESS METRICS AVAILABLE:")
print("-" * 40)

# Department-wise employee count
cursor.execute("""
SELECT d.dept_name, COUNT(er.user_id) as employee_count
FROM departments d
LEFT JOIN employee_records er ON d.dept_id = er.dept_id
GROUP BY d.dept_name
ORDER BY employee_count DESC
""")

print("\n EMPLOYEES BY DEPARTMENT:")
for row in cursor.fetchall():
    print(f"   {row[0]:<20}: {row[1]} employees")

# Average salary by department
cursor.execute("""
SELECT d.dept_name, ROUND(AVG(er.salary), 2) as avg_salary
FROM departments d
JOIN employee_records er ON d.dept_id = er.dept_id
GROUP BY d.dept_name
ORDER BY avg_salary DESC
""")

print("\n AVERAGE SALARY BY DEPARTMENT:")
for row in cursor.fetchall():
    print(f"   {row[0]:<20}: ${row[1]:,}")

# Performance metrics
cursor.execute("""
SELECT 
    COUNT(*) as total_employees,
    ROUND(AVG(salary), 2) as avg_salary,
    ROUND(AVG(performance_score), 2) as avg_performance,
    ROUND(AVG(hours_worked), 2) as avg_hours
FROM employee_records
""")

metrics = cursor.fetchone()
print(f"\n OVERALL COMPANY METRICS:")
print(f"   Total Employees: {metrics[0]}")
print(f"   Average Salary: ${metrics[1]:,}")
print(f"   Average Performance: {metrics[2]}/5.0")
print(f"   Average Hours/Month: {metrics[3]}")

conn.close()
print("\n Schema analysis complete!")

 COMPLETE STAR SCHEMA STRUCTURE

 DATABASE: Employee_Information.db
Total Tables: 3
Tables: users, departments, employee_records

 TABLE: USERS
Column Name      | Type      | Null | Default | Primary Key
--------------------------------------------------
id               | INTEGER   | YES  | None    | YES
name             | TEXT      | YES  | None    | NO
age              | INTEGER   | YES  | None    | NO

 SAMPLE DATA from users (first 5 rows):
id | name | age
---------------
1 | John Jackson | 41
2 | Justin Clark | 34
3 | Matthew Ga.. | 59
4 | Samantha L.. | 20
5 | Justin Ram.. | 64


 TABLE: DEPARTMENTS
Column Name      | Type      | Null | Default | Primary Key
--------------------------------------------------
dept_id          | INTEGER   | YES  | None    | YES
dept_name        | TEXT      | NO   | None    | NO
dept_head        | TEXT      | YES  | None    | NO
location         | TEXT      | YES  | None    | NO
budget           | INTEGER   | YES  | None    | NO

 SAMPLE DATA from 