## PRUEBA NOTEBOOK

In [21]:
import bz2
import csv
import os
import requests

import pandas as pd
import numpy as np

from typing import List

LOCAL_DATA_PATH = os.path.join(os.getcwd(), 'data')

base_url = 'https://gist.github.com/juanriaza/9b6f7ab3fc1cbda81100fa5d32512fd8/raw/a61db2d8d81789d5c7040eacee7b225bc2b2aa7f'
file_name = 'data.csv.bz2'

url = os.path.join(base_url, file_name)
url

'https://gist.github.com/juanriaza/9b6f7ab3fc1cbda81100fa5d32512fd8/raw/a61db2d8d81789d5c7040eacee7b225bc2b2aa7f/data.csv.bz2'

### Download the data from the provided link

In [22]:
DATA_DELIMITER = '|'
DATA_ENCODING = 'utf-8'

def fetch_data(url:str) -> List:
    """ Given an url with bz2 file and a target directory, this function makes a
    GET request to the file and decompress it.

    It handles the following cases:
        - Get request status has to be <200> Ok
        - The is data within the BZ2

    Args:
        url (str) : location of file to download
    
    Returns: 
        data (list) : list of rows as string
    """

    rq = requests.get(url)   

    if rq.status_code == 200:
        data = bz2.decompress(rq.content).decode(DATA_ENCODING).splitlines()

        assert len(data) >0, f"No data in file : {url}"
        return data

    else:
        raise FileNotFoundError
    

def write_to_csv(data:list, fname:str) -> None:
    """ Given a list of rows, writes a CSV file in the target directory with the 
    specified name.

    It handles the following cases:
        - Checks the target directory exists or it creates it
        - Checks the output file has .CSV as extention, if not, it adds it.

    Args:
        data (list)
        target_dir (str)
        fname (str)
    
    Returns
        None
    """
    if not os.path.exists(LOCAL_DATA_PATH):
        os.mkdir(LOCAL_DATA_PATH)
        
    if not fname.endswith('.csv'):
        fname += '.csv'

    i = 0
    path = os.path.join(LOCAL_DATA_PATH, fname)

    with open(path, 'w') as f:
        writer = csv.writer(f, delimiter = '|')

        for row in data:
            try:
                writer.writerow(row.split('|'))
                i +=1

            except Exception as e:
                # needs development
                print(e)

    print(f"Written {i} lines at : {path}")


In [23]:
write_to_csv(
    data = fetch_data(url),
    fname = 'data.csv'
)

Written 122375 lines at : /home/laura/Pruebas/technical_test/data/data.csv


### Set up a SQLite database locally and ingest the CSV file

In [8]:
import sqlite3
import sqlalchemy

from pandas.io import sql

%load_ext sql

In [10]:
%config SqlMagic.autocommit=False

In [11]:
%sql sqlite:///database.db

In [19]:
# paso 2 - carga de datos en sqlite

bytes