In [1]:
import sqlite3
import pandas as pd
import matplotlib as mpl
import csv

In [2]:
# establish database connection
conn = sqlite3.connect('purses.db')

# create cursor to execute sql
click = conn.cursor()

# Establish Customer Table


In [3]:
# delete table if already present / establish fresh, clean start to file when working in Kernal > 'Restart & Run All' order
click.execute('''DROP TABLE IF EXISTS customer''')

# create customer table
click.execute('''CREATE TABLE customer (
    customer_id INTEGER PRIMARY KEY,
    first VARCHAR(25) NOT NULL,
    last VARCHAR(25) NOT NULL,
    email VARCHAR(75) NOT NULL,
    street VARCHAR(25) NOT NULL,
    city VARCHAR(50) NOT NULL,
    state VARCHAR(15) NOT NULL,
    zip_code INTEGER NOT NULL,
    birthdate DATE NOT NULL,
    payment REAL NOT NULL
)''')

# I know I want to access the csv file and hold that value here in python
# customer_sheet = open('customer.csv')

# I also know python has to read the file that I open
# customer_contents = csv.reader(customer_sheet)



# A 'with-block' means the file automatically closes
with open('customer.csv', 'r') as f:
    # customer_sheet will hold the value of the csv being parsed
    # reader throws an error
    # Dictreader provides a fieldname parameter I can iterate over
    customer_sheet = csv.DictReader(f)
    
    # list comprehension time! 
    
    # to iterate over my csv and store each row and data point respectively
    # by corresponding header fields
    data_customer = [(i['customer_id'], i['first'], i['last'], i['email'], i['street'], i['city'], i['state'], i['zip_code'], i['birthdate'], i['payment']) for i in customer_sheet]


    

# Insert Data for Customer Table

In [4]:
# as for the database
# INSERT INTO customer table the column names and corresponding values per column
customer_insertion = "INSERT INTO customer (customer_id, first, last, email, street, city, state, zip_code, birthdate, payment) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"


In [5]:
# for sql to insert all data points parsed from the csv
click.executemany(customer_insertion, data_customer)

conn.commit() #necessary to save changes to the customer table

# Establish Delivery Table with Data

In [6]:
# delete table if already present
click.execute('''DROP TABLE IF EXISTS delivery''')

# create customer table
click.execute('''CREATE TABLE delivery (
    package TEXT PRIMARY KEY,
    customer_id INTEGER,
    ordered TIMESTAMP,
    delivered TIMESTAMP,
    payment VARCHAR(25),
    sold REAL,
    first TEXT NOT NULL,
    last TEXT NOT NULL,
    street VARCHAR(75) NOT NULL,
    city TEXT,
    state TEXT, 
    zip_code INTEGER,
    purse INTEGER
)''')

# Automatically closing the file means I save a line of code later on
with open('delivery.csv', 'r') as file:
    delivery_sheet = csv.DictReader(file)
    data_delivery = [(i['package'], i['customer_id'], i['ordered'], i['delivered'], i['payment'], i['sold'], i['first'], i['last'], i['street'], i['city'], i['state'], i['zip_code'], i['purse']) for i in delivery_sheet]


# feeding the values python just read, held into data_delivery 
delivery_insertion = "INSERT INTO delivery (package, customer_id, ordered, delivered, payment, sold, first, last, street, city, state, zip_code, purse) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"

# for sql to execute the statement to insert all data points parsed from the csv
click.executemany(delivery_insertion, data_delivery)

conn.commit() #necessary to save changes within the database to the delivery table

# About the Data

Some fun nuance exists and assumptions are to be made. 

**1. Any and all items priced below  250.00 must be assumed to be purse hardware.** 
    
    - Only if the item sold for  250.00 or more is the item definitively a purse because it is impossible to 
    buy a customizable purse for less than the standard box price of 250.00. 
    - Prices include taxes and shipping respectively


**2. We can assume the possibilty exists within the data for the customer address fields to not be the same as the delivery address fields.**
    
    - Two assumptions available from this include 1) gifts and 2) fraud.
        - Likewise, may not be assumed that 'delivery customer first' will always match 'customer first'.


**3. The 'purse' column contains only unique values; the assumption exists that each order is an individual, one-of-a-kind shipment that cannot be reproduced.** 

    - Took the liberty of nixing any and all possibility of Null / NaN values from dataset.... debating this idea now.
    


# Time to Query

This is where I get to know my customers a little bit. As a small business, I personally discuss each order with each customer payment, and it's important I'm able to ask questions to make decisions. 

I must know that all my data has been sent to my database, though!



In [7]:
# I need a query to ask how many records exist in my customer table
customer_cells = ('''SELECT COUNT(*) FROM customer''')

# a variable to hold the query execution
customer_rows = click.execute(customer_cells).fetchall()

for c in customer_rows:
    print(c)

(100,)


In [8]:
# I want a better view than that. Let's clean that up!

# Let's use some string formatting again
fmt = "{0}"
for cr in customer_rows:
    cr = int(fmt.format(*cr)) #integer casting for easy adding of the variable elsewhere
    print(cr)
    

100


So I know all 100 rows of customer data made it to my database. Let me just double-check my delivery sheet as well by adding my delivery sheet count. I know the database is perfect if it has exactly 200 rows available. 

In [9]:
for d in click.execute('''SELECT COUNT(*) FROM delivery''').fetchall():
    delivery_cells = int(fmt.format(*d)) # integer casting for easy adding in the next line
    print(delivery_cells+cr)

200


Perfect! So I know that all 200 cells of data points from each of my csv files have successfully transferred into my database, with 100 records per table. 


For my first order of business, I want to send a personalized follow-up gift thanking each customer for their purchase. I decide my preferred method of going about this task is to send a birthday gift to everybody.  

In [11]:
# sql query
birthday_list = ('''SELECT first, birthdate
                FROM customer
                ORDER BY birthdate''')

# view query result
fmt_two = "{0},  {1}"
for r in click.execute(birthday_list).fetchall():
    birthday_list = fmt_two.format(*r)
    # print()           ##extra space in case easier to view
    print(birthday_list)


Penni,  1930-01-03
Raymund,  1930-10-02
Adelle,  1932-01-24
Hugues,  1933-09-09
Chevy,  1935-01-28
Umberto,  1935-04-10
Hana,  1935-11-19
Alisander,  1936-09-18
Harbert,  1937-08-21
Karissa,  1938-10-04
Pierson,  1939-12-20
Clayton,  1940-01-26
Doy,  1940-05-18
Claretta,  1941-02-22
Licha,  1941-05-28
Tersina,  1941-10-27
Rustin,  1942-06-18
Harriott,  1943-03-31
Gaile,  1944-07-12
Fowler,  1945-01-31
Ginger,  1946-08-24
Fonz,  1946-11-05
Marin,  1948-06-20
Darryl,  1949-03-26
Clemence,  1949-09-15
Garrot,  1950-03-11
Matilde,  1950-04-10
Branden,  1951-11-27
Perrine,  1952-11-22
Jehu,  1953-05-08
Christean,  1953-10-18
Aloysius,  1955-06-26
Matias,  1957-04-16
Celie,  1957-06-11
Kara,  1958-12-05
Chryste,  1959-12-15
Tadeas,  1961-04-12
Kara-lynn,  1962-01-09
Jody,  1962-10-04
Kare,  1962-12-23
Rivkah,  1963-05-05
Grannie,  1966-02-02
Kayle,  1966-06-20
Vittorio,  1966-09-21
Cayla,  1967-12-19
Thomasine,  1968-08-25
Therese,  1968-10-08
Juliane,  1968-10-29
Rea,  1968-11-05
Doralin,  

# Let me ensure I'm not sending out multiple gift cards to the same customer. 

In [12]:
email_repeat_query = '''SELECT DISTINCT COUNT (email) FROM customer'''

CREATE_FUNCTION_JEEZ = "{0} "

for allthe in click.execute(email_repeat_query).fetchall():
    what = CREATE_FUNCTION_JEEZ.format(*allthe)
    print(what)

100 


I confirmed with the email_repeat_query  that I have exactly 100 unique emails accounted for within this customer table.

In [13]:
names_match_query =''' SELECT COUNT (last) last  
        FROM customer 
        GROUP BY first 
        HAVING COUNT (last) > 1'''

this = "{0} "

for matchy in click.execute(names_match_query).fetchall():
    repeat_names_count = this.format(*matchy)
    print(repeat_names_count)



If nothing prints from the repeat_names_count, then I have confirmed that at no point in my customer table does a customer's last name repeat either, so I doubt I have any duplicate results for the data in my customer table as far as contact information. 

#### I have two types of gifts available:

1. All-purpose gift cards with set balances of 15.00 dollars
2. iTunes gift cards with set balances of 15.00 dollars

To help me split the population between these two options, I choose to make an assumption of which to base my condition upon.

I make an assumption that older generations will enjoy general purpose giftcards more than iTunes-specific giftcards. 


In [14]:
# Pretending it's September now, I realize I need to filter by month as well. 
# I'm going to start now, looking at October. 

# sql query
query_october_cards = ('''SELECT first, birthdate
                FROM customer
                WHERE (birthdate LIKE '%-10-%') AND (birthdate <= '1994-12-31')
                ORDER BY birthdate''')

# view query result
for octc in click.execute(query_october_cards).fetchall():
    october_cards = fmt_two.format(*octc)
    print(october_cards)

Raymund,  1930-10-02
Karissa,  1938-10-04
Tersina,  1941-10-27
Christean,  1953-10-18
Jody,  1962-10-04
Therese,  1968-10-08
Juliane,  1968-10-29
Aaren,  1984-10-12
Goddart,  1993-10-12


In [15]:
# Now I'm going to determine the october itunes cards to prepare: 

# sql query
query_oct_itunes = ('''SELECT first, birthdate
                FROM customer
                WHERE (birthdate LIKE '%-10-%') AND (birthdate > '1994-12-31')
                ORDER BY birthdate''')

# view query result
for octunes in click.execute(query_oct_itunes).fetchall():
    october_itunes_cards = fmt_two.format(*octunes)
    print(october_itunes_cards)



Merna,  1998-10-19


# Create an email list

In [16]:
# sql query
query_oct_emails = ('''SELECT first, email
                FROM customer
                WHERE (birthdate LIKE '%-10-%')
                ORDER BY birthdate''')

# view query result
for againagain in click.execute(query_oct_emails).fetchall():
    october_emails = fmt_two.format(*againagain)
    print(october_emails)

Raymund,  rpostlethwaite2@nasa.gov
Karissa,  kwannes22@sciencedaily.com
Tersina,  tyoud1v@joomla.org
Christean,  csanchez1d@g.co
Jody,  jwillatts0@wordpress.com
Therese,  tchristmas4@1und1.de
Juliane,  jbirtle25@cnn.com
Aaren,  akemmr@patch.com
Goddart,  grootc@columbia.edu
Merna,  mshakesby1s@flavors.me


In [17]:
#conn.close()

### Now I want to merge and join tables around to get some deeper insight into the data. Maybe find a way to export a document here -- maybe hold query files imported into notebook?

## do pandas explorations /// make in-notebook graphs using matplotlib highlighting sales / questions

## get familiar with tableau / geographic display? Time of year graphs?