In [1]:
# Import necessary libraries
import sqlite3
import pandas as pd

# Connect to a SQLite database (or create one)
conn = sqlite3.connect('bookstore.db')
cursor = conn.cursor()

In [2]:
# Create a table 'unnormalized_books'
cursor.execute('''
CREATE TABLE unnormalized_books (
    id INTEGER PRIMARY KEY,
    title TEXT,
    author_name TEXT,
    author_email TEXT,
    genre TEXT,
    price REAL,
    publisher TEXT,
    publisher_contact TEXT
)
''')

# Sample data (assume repetitive author and publisher info)
data = [
    ('The Great Escape', 'John Doe', 'john@example.com', 'Adventure', 10.99, 'Alpha Publications', 'contact@alpha.com'),
    ('Mystery Lane', 'Jane Smith', 'jane@example.com', 'Mystery', 12.99, 'Beta Books', 'contact@beta.com'),
    ('Adventure in Space', 'John Doe', 'john@example.com', 'Sci-fi', 15.99, 'Alpha Publications', 'contact@alpha.com'),
    ('Ocean Blues', 'Sarah Connor', 'sarah@example.com', 'Romance', 7.99, 'Gamma Publishing', 'contact@gamma.com'),
    ('Lost in Dreams', 'Mike Tyson', 'mike@example.com', 'Fantasy', 9.99, 'Delta Literature', 'contact@delta.com'),
    ('Sunset Stories', 'Lucy Loo', 'lucy@example.com', 'Drama', 8.99, 'Epsilon Editions', 'contact@epsilon.com'),
    ('Moonlit Secrets', 'Brian Bryson', 'brian@example.com', 'Mystery', 11.99, 'Zeta Publishing', 'contact@zeta.com'),
    ('Starstruck', 'Anna Smith', 'anna@example.com', 'Romance', 7.99, 'Beta Books', 'contact@beta.com'),
    ('Dungeon Dive', 'Tim Track', 'tim@example.com', 'Adventure', 13.99, 'Alpha Publications', 'contact@alpha.com'),
    ('Hopes and Dreams', 'Liam Neeson', 'liam@example.com', 'Drama', 6.99, 'Theta Print', 'contact@theta.com'),
    ('Enchanted Realms', 'Emily Em', 'emily@example.com', 'Fantasy', 14.99, 'Iota Inc', 'contact@iota.com'),
    ('Twisted Fate', 'Oscar Orange', 'oscar@example.com', 'Thriller', 9.99, 'Kappa Publishing', 'contact@kappa.com'),
    ('Galactic War', 'Jenny June', 'jenny@example.com', 'Sci-fi', 15.49, 'Lambda Literature', 'contact@lambda.com'),
    ('Whispering Woods', 'Tom Thumb', 'tom@example.com', 'Adventure', 8.49, 'Mu Magazines', 'contact@mu.com'),
    ('Deserted Destinies', 'Penny Pen', 'penny@example.com', 'Drama', 10.49, 'Nu Novels', 'contact@nu.com'),
    ('Cursed Kingdoms', 'Ronny Ron', 'ronny@example.com', 'Fantasy', 12.49, 'Xi Xerox', 'contact@xi.com'),
    ('Hunted Houses', 'Samantha Sam', 'samantha@example.com', 'Horror', 7.99, 'Omicron Oasis', 'contact@omicron.com'),
    ('Timeless Tales', 'Danny Day', 'danny@example.com', 'Drama', 6.49, 'Pi Press', 'contact@pi.com'),
    ('Endless Embers', 'Wendy Wind', 'wendy@example.com', 'Romance', 7.49, 'Rho Reads', 'contact@rho.com'),
    ('Frozen Fears', 'Vince Vortex', 'vince@example.com', 'Thriller', 8.49, 'Sigma Stories', 'contact@sigma.com')
]

cursor.executemany('INSERT INTO unnormalized_books VALUES (NULL, ?, ?, ?, ?, ?, ?, ?)', data)
conn.commit()


### Exercise
- normalize the table

In [3]:
# Create authors table
cursor.execute('''
CREATE TABLE authors (
    author_id INTEGER PRIMARY KEY,
    name TEXT,
    email TEXT
)
''')

# Insert unique authors into authors table
cursor.execute('INSERT INTO authors (name, email) SELECT DISTINCT author_name, author_email FROM unnormalized_books')
conn.commit()

# Create publishers table
cursor.execute('''
CREATE TABLE publishers (
    publisher_id INTEGER PRIMARY KEY,
    name TEXT,
    contact TEXT
)
''')

# Insert unique publishers
cursor.execute('INSERT INTO publishers (name, contact) SELECT DISTINCT publisher, publisher_contact FROM unnormalized_books')
conn.commit()

# Create a normalized books table
cursor.execute('''
CREATE TABLE books (
    book_id INTEGER PRIMARY KEY,
    title TEXT,
    author_id INTEGER,
    genre TEXT,
    price REAL,
    publisher_id INTEGER,
    FOREIGN KEY(author_id) REFERENCES authors(author_id),
    FOREIGN KEY(publisher_id) REFERENCES publishers(publisher_id)
)
''')

# Populate the normalized books table
cursor.execute('''
INSERT INTO books (title, author_id, genre, price, publisher_id)
SELECT 
    title, 
    (SELECT author_id FROM authors WHERE name = author_name), 
    genre, 
    price, 
    (SELECT publisher_id FROM publishers WHERE name = publisher)
FROM unnormalized_books
''')
conn.commit()

In [4]:
# 1. List all Sci-fi books
df_sci_fi = pd.read_sql_query('''
SELECT title FROM books WHERE genre = 'Sci-fi'
''', conn)

# 2. Find all books published by 'Alpha Publications'
df_alpha_books = pd.read_sql_query('''
SELECT title 
FROM books 
JOIN publishers ON books.publisher_id = publishers.publisher_id
WHERE publishers.name = 'Alpha Publications'
''', conn)

# 3. Update an author's email
cursor.execute('''
UPDATE authors
SET email = 'john.doe@newmail.com'
WHERE name = 'John Doe'
''')
conn.commit()


## Last task

In [6]:
# Create emails table linked to authors
cursor.execute('''
CREATE TABLE emails (
    email_id INTEGER PRIMARY KEY,
    author_id INTEGER,
    email_address TEXT,
    email_type TEXT,
    FOREIGN KEY(author_id) REFERENCES authors(author_id)
)
''')
# Populate emails with the primary emails from the authors table
cursor.execute('''
INSERT INTO emails (author_id, email_address, email_type)
SELECT 
    author_id, 
    email, 
    'Primary'
FROM authors
''')
conn.commit()
# Alter the authors table to remove the email column
cursor.execute('''
ALTER TABLE authors
RENAME TO authors_old
''')

cursor.execute('''
CREATE TABLE authors (
    author_id INTEGER PRIMARY KEY,
    name TEXT
)
''')

cursor.execute('''
INSERT INTO authors (author_id, name)
SELECT 
    author_id, 
    name 
FROM authors_old
''')
conn.commit()

cursor.execute('DROP TABLE authors_old')
conn.commit()

# Populate the alpha_email for authors publishing with 'Alpha Publications'
cursor.execute('''
INSERT INTO emails (author_id, email_address, email_type)
SELECT 
    DISTINCT author_id, 
    substr(email_address, 0, instr(email_address, '@') - 1) || '@alphapubs.com', 
    'AlphaPub'
FROM emails 
WHERE author_id IN (
    SELECT DISTINCT author_id 
    FROM books 
    JOIN publishers ON books.publisher_id = publishers.publisher_id
    WHERE publishers.name = 'Alpha Publications'
) AND email_type = 'Primary'
''')
conn.commit()

# Inspect
df_emails = pd.read_sql_query('SELECT * FROM emails', conn)
print(df_emails)



    email_id  author_id          email_address email_type
0          1          1   john.doe@newmail.com    Primary
1          2          2       jane@example.com    Primary
2          3          3      sarah@example.com    Primary
3          4          4       mike@example.com    Primary
4          5          5       lucy@example.com    Primary
5          6          6      brian@example.com    Primary
6          7          7       anna@example.com    Primary
7          8          8        tim@example.com    Primary
8          9          9       liam@example.com    Primary
9         10         10      emily@example.com    Primary
10        11         11      oscar@example.com    Primary
11        12         12      jenny@example.com    Primary
12        13         13        tom@example.com    Primary
13        14         14      penny@example.com    Primary
14        15         15      ronny@example.com    Primary
15        16         16   samantha@example.com    Primary
16        17  