# Clean up data using SQLite

For this section, I am cleaning mock data with SQLite.

The `vet_data.txt` file is not in csv format, so I have to use Python to format the file first and pandas to convert to a dataframe.

In [1]:
import pandas as pd
import sqlite3
import os

In [2]:
# Open file, save to f
f = open('vet_data.txt', 'r')

# Save data to variable
content = f.read()

content_replaced = content.replace(";;", "+")

new_patients = content_replaced.replace("//", ",").split(",")

new_patients_split = []

for patient in new_patients:
    new_patients_split.append(patient.split("+"))

# Check work
new_patients_split

[['Pet name', ' owner name', ' pet type', ' pet dob '],
 [' Fluffy', ' Michael Scott', ' cat', ' 5/20/2019 '],
 [' Spots McGoo  ', ' Jan  Levinson ', ' dog', ' 12/1/2015 '],
 [' Speedy   ', ' Ryan Howard ', ' turtle ', ' unknown '],
 [' Gordon Gekko ', ' Ryan Howard ', ' turtle ', ' unknown '],
 [' Winston Churchill ', '   Pam   Beesley ', ' dog  ', ' 3/30/2009 '],
 [' Mr. Whiskers  ', ' Jim Halpert', ' cat ', ' 4/5/2018 '],
 [' Henrietta', ' Dwight   Shrute', ' porcupine  ', ' unknown '],
 [' Sprinkles ', ' Angela Martin  ', ' cat ', ' 7/19/2000 '],
 [' Princess Lady', ' Angela Martin  ', ' cat ', ' 8/4/2017 '],
 ['  Ember ', ' Angela Martin  ', ' cat ', ' 4/3/2015 '],
 ['   Milky Way ', ' Angela Martin  ', ' cat ', ' 11/15/2012 '],
 [' Diane', ' Angela Martin ', ' cat   ', ' 9/21/2015 '],
 [' Lumpy  ', '  Angela  Martin  ', ' cat ', ' 02/07/2012 '],
 [' Petals ', ' Angela Martin  ', ' cat ', ' 10/31/2010 '],
 [' Mr. Ash ', ' Angela Martin ', ' cat ', ' 6/1/2005 '],
 [' Phillip ', '  

In [3]:
df = pd.DataFrame(new_patients_split, columns=['pet', 'owner', 'type', 'dob'])

# Remove 1st row
df = df.iloc[1:]

df

Unnamed: 0,pet,owner,type,dob
1,Fluffy,Michael Scott,cat,5/20/2019
2,Spots McGoo,Jan Levinson,dog,12/1/2015
3,Speedy,Ryan Howard,turtle,unknown
4,Gordon Gekko,Ryan Howard,turtle,unknown
5,Winston Churchill,Pam Beesley,dog,3/30/2009
6,Mr. Whiskers,Jim Halpert,cat,4/5/2018
7,Henrietta,Dwight Shrute,porcupine,unknown
8,Sprinkles,Angela Martin,cat,7/19/2000
9,Princess Lady,Angela Martin,cat,8/4/2017
10,Ember,Angela Martin,cat,4/3/2015


In [None]:
# Write a CSV for SQLite if preferred
df.to_csv('./vet_data.csv')

In [4]:
# Or write to SQL database directly 
conn = sqlite3.connect('vet_database')
c = conn.cursor()

c.execute('CREATE TABLE IF NOT EXISTS patients(pet_id, pet_name, owner_name, pet_type, pet_dob)')
conn.commit()

In [5]:
df.to_sql('patients', conn, index = False)

ValueError: Table 'patients' already exists.

In [None]:
c.execute('''
SELECT *
FROM patients
''')

for row in c.fetchall():
    print(row)