In [24]:
import pandas as pd
import sqlite3
from pathlib import Path

# Env variable setup

In [14]:
DATA_DIR = Path("../data/")

# Read data
I'll go ahead and read the following files to collect data:
- real_estate.csv
- colors.json
- books.xml
- chinook.sqlite3

**Note:** We are not reading the entire database in case of SQLite3 data. Usually databases hold a huge amount of data and loading the entire data into a dataframe (which resides on RAM) is not a good idea.

In [28]:
# Reading CSV
data_real_estate = pd.read_csv(DATA_DIR / "real_estate.csv")

# Reading JSON
data_colors = pd.read_json(DATA_DIR / "colors.json")

# Reading XML
data_books = pd.read_xml(DATA_DIR / "books.xml")

# Reading SQLite3 (First 5 entires only)
db_connection = sqlite3.connect(DATA_DIR / "chinook.sqlite3")
query = 'SELECT firstname, lastname, address FROM customers LIMIT 5;'
data_chinook = pd.read_sql_query(query, db_connection)

# Peeking into data
I'll peek into the data and display first 5 rows of each.

## 1. Real Estate Data

In [20]:
data_real_estate.head(5)

Unnamed: 0,street,city,zip,state,beds,baths,sq__ft,type,sale_date,price,latitude,longitude
0,3526 HIGH ST,SACRAMENTO,95838,CA,2,1,836,Residential,Wed May 21 00:00:00 EDT 2008,59222,38.631913,-121.434879
1,51 OMAHA CT,SACRAMENTO,95823,CA,3,1,1167,Residential,Wed May 21 00:00:00 EDT 2008,68212,38.478902,-121.431028
2,2796 BRANCH ST,SACRAMENTO,95815,CA,2,1,796,Residential,Wed May 21 00:00:00 EDT 2008,68880,38.618305,-121.443839
3,2805 JANETTE WAY,SACRAMENTO,95815,CA,2,1,852,Residential,Wed May 21 00:00:00 EDT 2008,69307,38.616835,-121.439146
4,6001 MCMAHON DR,SACRAMENTO,95824,CA,2,1,797,Residential,Wed May 21 00:00:00 EDT 2008,81900,38.51947,-121.435768


## 2. Colors data

In [22]:
data_colors.head(5)

Unnamed: 0,color,value
0,red,#f00
1,green,#0f0
2,blue,#00f
3,cyan,#0ff
4,magenta,#f0f


## 3. Books data

In [23]:
data_books.head(5)

Unnamed: 0,id,author,title,genre,price,publish_date,description
0,bk101,"Gambardella, Matthew",XML Developer's Guide,Computer,44.95,2000-10-01,An in-depth look at creating applications \n ...
1,bk102,"Ralls, Kim",Midnight Rain,Fantasy,5.95,2000-12-16,"A former architect battles corporate zombies, ..."
2,bk103,"Corets, Eva",Maeve Ascendant,Fantasy,5.95,2000-11-17,After the collapse of a nanotechnology \n ...
3,bk104,"Corets, Eva",Oberon's Legacy,Fantasy,5.95,2001-03-10,"In post-apocalypse England, the mysterious \n ..."
4,bk105,"Corets, Eva",The Sundered Grail,Fantasy,5.95,2001-09-10,"The two daughters of Maeve, half-sisters, \n ..."


## 4. Chinook data
Since we only extracted 5 entries from the DB, we don't need `head` in this case and can display the entire dataframe.

In [33]:
data_chinook

Unnamed: 0,FirstName,LastName,Address
0,Luís,Gonçalves,"Av. Brigadeiro Faria Lima, 2170"
1,Leonie,Köhler,Theodor-Heuss-Straße 34
2,François,Tremblay,1498 rue Bélanger
3,Bjørn,Hansen,Ullevålsveien 14
4,František,Wichterlová,Klanova 9/506
