# Get Data 
> A notebook for getting data from official sources and unzipping them to machine readable formats

- toc: true 
- badges: false
- comments: true
- categories: [jupyter]
- author: Nirant Kasliwal and Meghana Bhange
<!-- - image: images/chart-preview.png -->

In [None]:
# hide
# !pip install requests
# !pip install pydantic
# !pip install tqdm

In [None]:
# hide_input
import json
from pathlib import Path
from typing import List, Union

import requests
from pydantic import BaseModel
from tqdm.notebook import tqdm

## Define the Book Class with metadata info

In [None]:
# collapse_input
class Book(BaseModel):
    id: int
    subject = "History"
    class_or_grade: int = None
    url: str = ""
    title: str = ""
    origin: str = ""
    zip_file_name: str = ""

    def download(self, file_path: Path = ".", file_name: Union[None, str] = None):
        """
        Utility function to download
        """
        url = self.url
        r = requests.get(url)

        if file_name is None:
            url_path = Path(url)
            file_name = url_path.name
        path = Path(file_path).resolve() / file_name
        with path.open("wb") as f:
            f.write(r.content)

        self.zip_file_name = file_name
        return r.status_code == 200

    def unzip(self):
        """"""
        file_path = self.zip_file_name
        try:
            assert self.zip_file_name != ""
        except AssertionError as e:
            raise AssertionError(
                f"Please download the file or set the zip_file_name variable"
            )
        import zipfile

        with zipfile.ZipFile(file_path, "r") as zip_ref:
            zip_ref.extractall("../data/")

In [None]:
# collapse
sheet_name = "History"
books_list = (
    f"https://api.steinhq.com/v1/storages/5fd49704f62b6004b3eb63a3/{sheet_name}"
)

In [None]:
# collapse
r = requests.get(books_list)

In [None]:
# collapse
ncert_history_books = [Book(**x) for x in json.loads(r.text)]

## Run for all Books

In [None]:
# collapse
for book in tqdm(ncert_history_books):
    book.download()
    book.unzip()

In [None]:
# ncert_history_books[1].download()
# ncert_history_books[1].unzip()