# Example Extraction

This is the full example for the data shown earlier. Shown again here for convenience.

![](./images/example1_spreadsheet.png)

In [None]:

from databaker.framework import *
from tutorialResources.scraper import Scraper

scraper = Scraper("https://www.fake-website.com/example1")
scraper

In [None]:
tabs = scraper.distribution.as_databaker()

# define a list. this is mandatory - for later
tidied_sheets = []

# for each of the selected tabs....do everything thats indented (in this case we only have 1 tab, 
# but that isn't common so we'll stick with the typical approach)
for tab in tabs:       

    # define a selection of cells as the observations
    observations = tab.excel_ref('C5').expand(DOWN).expand(RIGHT).is_not_blank()
    
    # define other selections of cells to be our dimensions
    assets = tab.excel_ref('C3').expand(RIGHT).is_not_blank()
    names = tab.excel_ref('B5').expand(DOWN).is_not_blank()
    group = tab.excel_ref('A5').expand(DOWN).is_not_blank()

    # define the relationships of the cells selected as dimensions (relative to the observations)
    dimensions = [
              HDim(assets, "Assets", DIRECTLY, ABOVE), 
              HDim(names, "Name", DIRECTLY, LEFT), 
              HDim(group, "Group", CLOSEST, ABOVE) 
                 ]
    
    # Now we process these relationship for this tab (this code rarely changes)
    tidy_sheet = ConversionSegment(tab, dimensions, observations) # < --- processing
    savepreviewhtml(tidy_sheet, fname="Preview.html")
    
    tidied_sheets.append(tidy_sheet.topandas()) # <-- adding result of processing this tab to our list

Now let's have a look at the output

In [None]:

# NOTE - this is not the next step in production (see later examples) but lets's see what we've got
import pandas as pd
dataframe = pd.concat(tidied_sheets)
dataframe[:20]