In [6]:
import pathlib
import sqlite3
import pandas as pd
import pandas_datareader.data as pdr
from datetime import datetime as dt

In [7]:
## IMPORT LOCAL CSV DATA

In [8]:
iris_csv = pathlib.Path('../datasets/iris.csv').absolute()

In [9]:
df = pd.read_csv(iris_csv)
df.head()

In [5]:
# count class frequency
iris_classes = df.groupby('class')
iris_classes.size()

In [None]:
# get all stats per class
iris_classes.describe()

In [None]:
# Now remove all rows having sepal_width_cm < 3.0
df = df[df.sepal_width_cm > 3.0]

In [None]:
## EXPORT TO LOCAL FILES

In [None]:
# csv
target_csv = pathlib.Path('output.csv')
df.to_csv(target_csv)

In [None]:
# json
target_json = pathlib.Path('output.json')
with target_json.open('w') as f:
    f.write(df.to_json())

In [None]:
# SQLite database: dump the Dataframe to a new DB table
db = 'db.sqlite3'
table_name = 'Iris'
with sqlite3.connect(db) as conn:
    df.to_sql(table_name, conn, if_exists="append")

In [None]:
# now let's check that the table has been created
with sqlite3.connect(str(db)) as conn:
    cursor = conn.cursor()
    cursor.execute('SELECT * FROM {}'.format(table_name))
    print(cursor.fetchall())

In [None]:
## IMPORT DIRTY DATASETS (eg. with missing fields)

In [None]:
dirty_iris_csv = pathlib.Path('../datasets/dirty_iris.csv').absolute()
dirty_df = pd.read_csv(dirty_iris_csv, na_values=[''])
dirty_df.head(10)

In [None]:
# show missing data
pd.isnull(dirty_df)

In [None]:
## IMPORT DATA FROM THE WEB

In [None]:
# Import Apple stock's ticker data from Morningstar, in a date range
start = dt(2018, 1, 1)
end = dt(2018, 2, 28)
aapl = pdr.DataReader('AAPL', 'morningstar', start, end)
aapl.head()