## Imports

In [47]:
import pandas as pd
import deltalake
import os

def data_layer_to_pantas_df(data_layer_path: str) -> pd.DataFrame:
    return deltalake.DeltaTable(data_layer_path).to_pandas()

bronze_layer_path = "./lakehouse/bronze_layer/"
silver_layer_path = "./lakehouse/silver_layer"
golden_layer_path = "./lakehouse/golden_layer"

## Golden Layer

In [31]:
df_golden_layer = data_layer_to_pantas_df(golden_layer_path)

print("----------------------------------------------------")
print(f" Golden layer shape: { df_golden_layer.shape} (rows, cols)")
print("----------------------------------------------------")

----------------------------------------------------
 Golden layer shape: (5116, 6) (rows, cols)
----------------------------------------------------


In [32]:
df_golden_layer.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5116 entries, 0 to 5115
Data columns (total 6 columns):
 #   Column             Non-Null Count  Dtype              
---  ------             --------------  -----              
 0   brewery_type       5116 non-null   object             
 1   country            5116 non-null   object             
 2   state              5116 non-null   object             
 3   city               5116 non-null   object             
 4   breweries_count    5116 non-null   int32              
 5   last_updated_gold  5116 non-null   datetime64[us, UTC]
dtypes: datetime64[us, UTC](1), int32(1), object(4)
memory usage: 220.0+ KB


In [28]:
total_breweries_count = df_golden_layer['breweries_count'].sum()

print("----------------------------------------------------")
print(" Total breweries count:", total_breweries_count)
print("----------------------------------------------------")

df_golden_layer.sort_values(by='breweries_count', ascending=False).head(10).reset_index(drop=True)

----------------------------------------------------
 Total breweries count: 8323
----------------------------------------------------


Unnamed: 0,brewery_type,country,state,city,breweries_count,last_updated_gold
0,micro,united states,colorado,denver,58,2024-10-21 20:47:56+00:00
1,micro,united states,washington,seattle,45,2024-10-21 20:47:56+00:00
2,micro,united states,california,san diego,45,2024-10-21 20:47:56+00:00
3,micro,united states,oregon,portland,41,2024-10-21 20:47:56+00:00
4,micro,united states,texas,austin,30,2024-10-21 20:47:56+00:00
5,brewpub,united states,oregon,portland,29,2024-10-21 20:47:56+00:00
6,micro,united states,illinois,chicago,28,2024-10-21 20:47:56+00:00
7,bar,singapore,singapore,singapore,28,2024-10-21 20:47:56+00:00
8,micro,united states,minnesota,minneapolis,24,2024-10-21 20:47:56+00:00
9,micro,united states,texas,houston,23,2024-10-21 20:47:56+00:00


## Silver Layer

In [33]:
df_silver_layer = data_layer_to_pantas_df(silver_layer_path)

print("----------------------------------------------------")
print(f" Silver layer shape: { df_silver_layer.shape} (rows, cols)")
print("----------------------------------------------------")

----------------------------------------------------
 Silver layer shape: (8323, 16) (rows, cols)
----------------------------------------------------


In [34]:
df_silver_layer.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8323 entries, 0 to 8322
Data columns (total 16 columns):
 #   Column               Non-Null Count  Dtype              
---  ------               --------------  -----              
 0   id                   8323 non-null   object             
 1   name                 8323 non-null   object             
 2   brewery_type         8323 non-null   object             
 3   address_1            7558 non-null   object             
 4   address_2            106 non-null    object             
 5   address_3            35 non-null     object             
 6   city                 8323 non-null   object             
 7   postal_code          8323 non-null   object             
 8   country              8323 non-null   object             
 9   longitude            5997 non-null   float64            
 10  latitude             5997 non-null   float64            
 11  phone                7475 non-null   object             
 12  website_url         

In [35]:
df_silver_layer.head()

Unnamed: 0,id,name,brewery_type,address_1,address_2,address_3,city,postal_code,country,longitude,latitude,phone,website_url,state,street,last_updated_silver
0,fcd9312c-7531-4801-b481-52f4267cf628,Gösser Brauerei (Heineken),large,brauhausgasse 1,,,leoben,8700,austria,15.093344,47.362556,438102069791,https://www.goesser.at,steiermark,brauhausgasse 1,2024-10-21 20:47:05+00:00
1,eb82907e-7f0d-4453-b07e-4e00763e1cf6,Schladminger Brauerei (Heineken),large,hammerfeldweg 163,,,schladming,8970,austria,13.685049,47.390297,433687225910,https://www.schladmingerbier.at,steiermark,hammerfeldweg 163,2024-10-21 20:47:05+00:00
2,4c8dff75-fa8c-41ce-9999-1f063b59addb,Schwechater Brauerei (Heineken),large,mautnermarkhofstrae 11,,,schwechat,2320,austria,16.467173,48.144924,431701400,https://www.schwechater.at,niedersterreich,mautnermarkhofstrae 11,2024-10-21 20:47:05+00:00
3,c88759b6-88d6-4172-bed7-61dec61e50aa,Schleppe Brauerei (Heineken),large,schleppeplatz 1,,,klagenfurt am wrthersee,9020,austria,14.286632,46.641697,4346342700,http://www.schleppe.at,krnten,schleppeplatz 1,2024-10-21 20:47:05+00:00
4,16fb2c6e-c7e1-4d46-b9d0-1c86816eb129,Die Bierbotschaft,brewpub,ponigler str 52,,,wundschuh,8142,austria,15.455187,46.912914,436763530560,https://www.bierbotschaft.at,steiermark,ponigler str 52,2024-10-21 20:47:05+00:00


## Bronze Layer

In [51]:
json_files = [f for f in os.listdir(bronze_layer_path) if f.endswith('.json')]


dfs = []

for f in json_files:
    file_path = os.path.join(bronze_layer_path, f)
    df = pd.read_json(file_path)  # Removido lines=True
    dfs.append(df)


df_bronze_layer = pd.concat(dfs, ignore_index=True)


print("----------------------------------------------------")
print(f" Bronze layer shape: {df_bronze_layer.shape} (rows, cols)")
print("----------------------------------------------------")

----------------------------------------------------
 Bronze layer shape: (8323, 16) (rows, cols)
----------------------------------------------------


In [49]:
df_bronze_layer.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8323 entries, 0 to 8322
Data columns (total 16 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   id              8323 non-null   object 
 1   name            8323 non-null   object 
 2   brewery_type    8323 non-null   object 
 3   address_1       7558 non-null   object 
 4   address_2       106 non-null    object 
 5   address_3       35 non-null     object 
 6   city            8323 non-null   object 
 7   state_province  8323 non-null   object 
 8   postal_code     8323 non-null   object 
 9   country         8323 non-null   object 
 10  longitude       5997 non-null   float64
 11  latitude        5997 non-null   float64
 12  phone           7475 non-null   object 
 13  website_url     7132 non-null   object 
 14  state           8323 non-null   object 
 15  street          7558 non-null   object 
dtypes: float64(2), object(14)
memory usage: 1.0+ MB


In [50]:
df_bronze_layer.head()

Unnamed: 0,id,name,brewery_type,address_1,address_2,address_3,city,state_province,postal_code,country,longitude,latitude,phone,website_url,state,street
0,d7ae324d-dcc4-4b52-b857-35dca78f8c37,Black Fire Winery,micro,1261 E Munger Rd,,,Tecumseh,Michigan,49286-8714,United States,,,5174249232,,Michigan,1261 E Munger Rd
1,f6cc21c0-039d-4257-9869-b168502185d6,Black Flag Brewing Company,micro,9315 Snowden River Pkwy,,,Columbia,Maryland,21046-2091,United States,-76.810364,39.192524,4438645139,http://www.blackflagbrewingco.com,Maryland,9315 Snowden River Pkwy
2,866e9938-a7ea-41c1-a2b9-81ecb26aa3b0,Black Fleet Brewing,micro,2302 Fawcett Ave,,,Tacoma,Washington,98402,United States,-122.44036,47.24082,2533271641,http://www.blackfleetbrewing.com,Washington,2302 Fawcett Ave
3,a0fadb5a-1b4c-4e40-9782-27f06c10a52d,Black Forest Brew Haus,brewpub,2015 New Hwy,,,Farmingdale,New York,11735-1103,United States,-73.414433,40.755052,6313919500,http://www.blackforestbrewhaus.com,New York,2015 New Hwy
4,d3eba4df-b289-4ee6-ada9-da33ef0567aa,Black Forest Brewery,brewpub,301 W Main St,,,Ephrata,Pennsylvania,17522-1713,United States,-76.183579,40.181767,7174507217,http://www.blackforestbrewery.net,Pennsylvania,301 W Main St
