# load a `csv` file into sql

### Import the necessary libraries

In [1]:
import pymysql
import sqlalchemy as alch # python -m pip install --upgrade 'sqlalchemy<2.0'
from getpass import getpass
import pandas as pd

1. FROM workbench: create a database with the name `shoes`

In [6]:
DROP DATABASE IF EXISTS shoes;
CREATE DATABASE IF NOT EXISTS shoes;

2. Establish a connection to your database through python

In [2]:
password = getpass("Please enter your password: ")

Please enter your password: ········


In [22]:
dbName = "shoes"

In [23]:
connectionData=f"mysql+pymysql://root:{password}@localhost/{dbName}"

In [24]:
engine = alch.create_engine(connectionData)

3. Load the table into pandas: from the datasets folder, get the shoes csv file

In [26]:
df = pd.read_csv("shoes.csv")

In [27]:
df.sample()

Unnamed: 0.1,Unnamed: 0,names,brand,price,links
10,10,Zapatilla Converse Lift Hi Suede,CONVERSE,84.95,https://www.murallasport.com/producto/5578-zap...


In [30]:
df.columns

Index(['Unnamed: 0', 'names', 'brand', 'price', 'links'], dtype='object')

4. Drop un-wanted columns if they exist

In [33]:
df.drop(columns=['Unnamed: 0'], inplace=True)

In [34]:
df.sample()

Unnamed: 0,names,brand,price,links
15,Zapatilla Converse Run Star Hike Hi Gris,CONVERSE,110.0,https://www.murallasport.com/producto/5387-zap...


5. Insert the `df` into your `shoes` database: use a pandas method. You'll need to use some arguments: name & con

In [36]:
df.to_sql("shoes", connectionData, if_exists="replace", index=False)

24

6. From python, query the table back and call it `queried_df`

In [48]:
queried_df = pd.read_sql_query("""
SELECT * 
    FROM shoes;
""", engine)

In [49]:
queried_df.head()

Unnamed: 0,names,brand,price,links
0,Zapatilla Mujer Reebok Classic Leather SP Bl...,ADIDAS,90.0,https://www.murallasport.com/producto/5477-zap...
1,Zapatilla Nike Zoom Air Fire Blanco,NIKE,119.99,https://www.murallasport.com/producto/5998-zap...
2,New Balance 530,NEW BALANCE,110.0,https://www.murallasport.com/producto/6229-new...
3,Zapatillla New Balance CT302 Beige,NEW BALANCE,110.0,https://www.murallasport.com/producto/5330-zap...
4,Zapatilla Mujer New Balance 327 Beige,NEW BALANCE,114.0,https://www.murallasport.com/producto/5473-zap...


7. Does it look okay? You need to get rid of the index. Re-run your code with another argument to prevent that from being created

In [52]:
queried_df = pd.read_sql_query("""
SELECT names, brand, price, links
    FROM shoes;
""", engine, index_col="names")

In [53]:
queried_df.head()

Unnamed: 0_level_0,brand,price,links
names,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Zapatilla Mujer Reebok Classic Leather SP Blanco,ADIDAS,90.0,https://www.murallasport.com/producto/5477-zap...
Zapatilla Nike Zoom Air Fire Blanco,NIKE,119.99,https://www.murallasport.com/producto/5998-zap...
New Balance 530,NEW BALANCE,110.0,https://www.murallasport.com/producto/6229-new...
Zapatillla New Balance CT302 Beige,NEW BALANCE,110.0,https://www.murallasport.com/producto/5330-zap...
Zapatilla Mujer New Balance 327 Beige,NEW BALANCE,114.0,https://www.murallasport.com/producto/5473-zap...


7. Now, from python, drop the database AND the table

Reminder: selecting & insertying use different methods. one uses pandas and the other one just the engine.

AttributeError: 'str' object has no attribute 'cursor'

In [60]:
with engine.connect() as connectionData:
    connectionData.execute("DROP DATABASE shoes")

OperationalError: (pymysql.err.OperationalError) (1008, "Can't drop database 'shoes'; database doesn't exist")
[SQL: DROP DATABASE shoes]
(Background on this error at: https://sqlalche.me/e/14/e3q8)

8. Get your code and create a function

In [None]:
def load_into_db (db, table_name, df):
    """This function should: 
    1. Establish the connection to the database
    2. Drop the database if exists and create it again
    3. Insert the table
    4. Return some feedback: how many rows where inserted or the table itself
    """
    pass

8. Try to call the function more than once. Does it work? if so, success!

In [82]:
db="shoes"
df = pd.read_csv("shoes.csv")
password = getpass("Please enter your password: ")

Please enter your password: ········


In [91]:
def load_into_db (db, df):
    connectionData=f"mysql+pymysql://root:{password}@localhost/{db}"
    engine = alch.create_engine(connectionData)
    engine.execute(f"DROP DATABASE IF EXISTS {db}")
    engine.execute(f"CREATE DATABASE {db}")
    df.to_sql(db, connectionData, if_exists="replace", index=False)
    
    engine.execute(f"""USE {db};""")
    count = pd.read_sql_query(f"""select * from {db}""", engine).shape[0]
    return f"These many rows have been inserted: {count}"

In [92]:
load_into_db(db,df)

'These many rows have been inserted: 24'