In [1]:
from sqlalchemy import create_engine
import pandas as pd
from warnings import filterwarnings
import pymysql
filterwarnings('ignore', category=pymysql.Warning)
import os

In [2]:
file_name1 = os.path.join('resources','WordAssociation-AC.csv')
file_name2 = os.path.join('resources','WordAssociation-BC.csv')
file_name3 = os.path.join('resources','WordAssociation-CC.csv')

* **Instructions**:

    * Create a **`word_association`** database.
    
    * Create a **`words`** table, and import the three .csv files into this table.
    
    * Use the SQL Command `alter table words add id int primary key auto_increment;` to add a primary key named **`id`** as a new Primary Key column.

    * Create a query that collects all of the rows whose "source" is "AC"

    * Create a query that collects all of the rows whose "source" is "BC"

    * Create a query that collects all of the rows whose "source" is "CC"

    * Create a query that collects all of the rows whose author is within the range of 0-20

    * Create a query that searches for any rows that have "pie" in their "word1" or "word2" columns

* **Bonus**:

    * There are some functions in MySQL that allow users to perform simple calculations like `MIN()`, `MAX()`, `COUNT()`, `AVG()`, and `SUM()`. Look some of these funcions up and then perform the following calculations...

    * Find the total number of records within the data table using the `MAX()` function

    * Find the lowest "ID" for those rows with a "source" of "CC"

    * Count how many rows have an "author" of 12

In [3]:
engine = create_engine('mysql+pymysql://root:kcmo1728@localhost')  # connect to server
engine.execute("create database if not exists word_association") #create db
engine = create_engine('mysql+pymysql://root:kcmo1728@localhost/word_association') # select new db

In [4]:
all_files = [file_name1, file_name2, file_name3]
data = []
for file_name in all_files:
    df = pd.read_csv(file_name)
    data.append(df)
data_frame = pd.concat(data)

In [5]:
data_frame

Unnamed: 0,author,word1,word2,source
0,289,maudie,atticus,AC
1,312,limpet,mine,AC
2,235,magneto,fiction,AC
3,312,john deere,tractor,AC
4,218,read,library,AC
5,262,composition,computer,AC
6,248,toilet,break,AC
7,235,elusive,butterfly,AC
8,39,fire,habanero,AC
9,42,boat,sink,AC


In [6]:
data_frame.to_sql(con=engine, name='words', if_exists='replace', index=False)

In [7]:
engine.execute("alter table words add id int primary key auto_increment;")


<sqlalchemy.engine.result.ResultProxy at 0x1e690971160>

In [8]:
new_df = pd.read_sql_query('select * from words', engine)
new_df.head()

Unnamed: 0,author,word1,word2,source,id
0,289,maudie,atticus,AC,1
1,312,limpet,mine,AC,2
2,235,magneto,fiction,AC,3
3,312,john deere,tractor,AC,4
4,218,read,library,AC,5


In [9]:
new_df = pd.read_sql_query('select * from words where source = "AC"', engine)
new_df.head()

Unnamed: 0,author,word1,word2,source,id
0,289,maudie,atticus,AC,1
1,312,limpet,mine,AC,2
2,235,magneto,fiction,AC,3
3,312,john deere,tractor,AC,4
4,218,read,library,AC,5


In [10]:
new_df = pd.read_sql_query('select * from words where source = "BC"', engine)
new_df.head()

Unnamed: 0,author,word1,word2,source,id
0,390,substantiate,authenticate,BC,20800
1,663,speed limit,laws,BC,20801
2,331,britches,buckle,BC,20802
3,341,tatonka,tonka,BC,20803
4,635,evidence,deed,BC,20804


In [11]:
new_df = pd.read_sql_query('select * from words where source = "CC"', engine)
new_df.head()

Unnamed: 0,author,word1,word2,source,id
0,743,prim,proper,CC,39873
1,727,squirrels,birds i put out food for the birds,CC,39874
2,728,admiral byrd,admiral nelson,CC,39875
3,728,whiskey,irish,CC,39876
4,736,men,women,CC,39877


In [12]:
new_df = pd.read_sql_query('select * from words where author >=0 and author <=20', engine)
new_df.head()

Unnamed: 0,author,word1,word2,source,id
0,12,round,top,AC,13
1,12,deceit,catfish,AC,14
2,12,head,gasket,AC,19
3,12,sheep,wool,AC,24
4,12,hilarious,monty python,AC,37


In [13]:
new_df = pd.read_sql_query('select * from words where word1 = "pie" or word2 = "pie"', 
                           engine)
new_df.head()


Unnamed: 0,author,word1,word2,source,id
0,12,pie,crust,AC,1757
1,235,apple,pie,AC,1971
2,235,pie,patty cake patty cake,AC,3402
3,12,pie,crust,AC,8082
4,172,pizza,pie,AC,8193
