In [1]:
import os
import sys

from moredata.enricher import EnricherBuilder, Enricher
from moredata.enricher.sql_connector import SqlConnector
from moredata.models.data import JsonData, GeopandasData
from moredata.parser import parse_document
from moredata.utils import read_json_from_file, Converter, write_json_generator_to_json
from moredata.datasets import get_path



In [2]:
import pandas as pd

df = pd.read_csv(get_path("airbnb-berlin-main"))
df = df.loc[(~df["latitude"].isna()) & (~df["longitude"].isna())]

df.iloc[0:100].to_json("./data/airbnb-berlin.json", orient="records")

In [3]:
from sqlalchemy import create_engine

URL = "mysql+pymysql://root:root@localhost:3306/moredata"
engine = create_engine(URL, echo=False)

extra = pd.read_csv(get_path("airbnb-berlin-extra"))
extra.to_sql("extra", con=engine, if_exists="replace")

19859

In [5]:
pd.read_sql("SELECT id, amenities, accommodates, beds, bedrooms FROM extra", engine)

Unnamed: 0,id,amenities,accommodates,beds,bedrooms
0,1944,"[""Dishes and silverware"", ""Kitchen"", ""Oven"", ""...",1.0,1.0,1.0
1,2015,"[""Smoke alarm"", ""Cooking basics"", ""Lockbox"", ""...",3.0,0.0,1.0
2,3176,"[""Dedicated workspace"", ""Hangers"", ""Kitchen"", ...",4.0,2.0,1.0
3,3309,"[""Host greets you"", ""Washer"", ""Hangers"", ""Hot ...",1.0,1.0,1.0
4,6883,"[""Smoke alarm"", ""Cooking basics"", ""Dishes and ...",2.0,1.0,1.0
...,...,...,...,...,...
19854,48597066,"[""Dedicated workspace"", ""Hangers"", ""Private en...",2.0,1.0,1.0
19855,48599795,"[""Hot water kettle"", ""Dryer"", ""Hangers"", ""Hot ...",2.0,1.0,1.0
19856,48600069,"[""Air conditioning"", ""Hot water"", ""Elevator"", ...",1.0,1.0,1.0
19857,48602039,"[""Dedicated workspace"", ""Hangers"", ""Kitchen"", ...",1.0,,1.0


## Using Json Data

In [5]:
data = JsonData(data_file="./data/airbnb-berlin.json", parser=parse_document)

sql_enricher = Enricher(
    connector=SqlConnector(
        connection_url=URL,
        table_name="extra",
        column="id",
        result_attr="extra",
        dict_keys=["id"],
    )
)

data_enriched = sql_enricher.enrich(data)

write_json_generator_to_json("./data/airbnb-berlin-enriched", data_enriched, 100000)

## Using Geopandas

In [23]:
gdf = pd.read_csv(get_path("airbnb-berlin-main"))
gdf = gdf.loc[(~gdf["latitude"].isna()) & (~gdf["longitude"].isna())]
airbnb = GeopandasData().from_geodataframe(gdf.iloc[0:100])

sql_enricher = Enricher(
    connector=SqlConnector(
        connection_url=URL,
        table_name="extra",
        column="id",
        result_attr="extra",
        df_column="id",
    )
)

data_enriched = sql_enricher.enrich(airbnb)

In [24]:
import ast

data_enriched["enriched"] = data_enriched["enriched"].map(lambda d: ast.literal_eval(d))
data_enriched = pd.concat(
    [data_enriched, pd.DataFrame(data_enriched["enriched"].to_dict()).T], axis=1
)

In [25]:
data_enriched

Unnamed: 0,id,name,neighbourhood,neighbourhood_cleansed,room_type,price,latitude,longitude,enriched,index,id.1,amenities,accommodates,beds,bedrooms
0,1944,bright & airy Pberg/Mitte 3 months or more,"Berlin, Germany",Brunnenstr. Nord,Private room,$20.00,52.54425,13.39749,"{'index': 0, 'id': '1944', 'amenities': '[""Dis...",0,1944,"[""Dishes and silverware"", ""Kitchen"", ""Oven"", ""...",1.0,1.0,1.0
1,2015,Berlin-Mitte Value! Quiet courtyard/very central,"Berlin, Germany",Brunnenstr. Süd,Entire home/apt,$59.00,52.53454,13.40256,"{'index': 1, 'id': '2015', 'amenities': '[""Smo...",1,2015,"[""Smoke alarm"", ""Cooking basics"", ""Lockbox"", ""...",3.0,0.0,1.0
2,3176,Fabulous Flat in great Location,"Berlin, Germany",Prenzlauer Berg Südwest,Entire home/apt,$90.00,52.53500,13.41758,"{'index': 2, 'id': '3176', 'amenities': '[""Ded...",2,3176,"[""Dedicated workspace"", ""Hangers"", ""Kitchen"", ...",4.0,2.0,1.0
3,3309,BerlinSpot Schöneberg near KaDeWe,"Berlin, Germany",Schöneberg-Nord,Private room,$29.00,52.49885,13.34906,"{'index': 3, 'id': '3309', 'amenities': '[""Hos...",3,3309,"[""Host greets you"", ""Washer"", ""Hangers"", ""Hot ...",1.0,1.0,1.0
4,6883,Stylish East Side Loft in Center with AC & 2 b...,"Berlin, Germany",Frankfurter Allee Süd FK,Entire home/apt,$79.00,52.51171,13.45477,"{'index': 4, 'id': '6883', 'amenities': '[""Smo...",4,6883,"[""Smoke alarm"", ""Cooking basics"", ""Dishes and ...",2.0,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,174579,Berlin City Studio 4 mit W-Lan,,Alexanderplatz,Entire home/apt,$78.00,52.50905,13.41556,"{'index': 95, 'id': '174579', 'amenities': '[""...",95,174579,"[""Dishes and silverware"", ""Kitchen"", ""Oven"", ""...",3.0,3.0,2.0
96,175667,"calm, sunny and central apartment",,Schöneberg-Süd,Entire home/apt,$39.00,52.49138,13.34331,"{'index': 96, 'id': '175667', 'amenities': '[""...",96,175667,"[""Host greets you"", ""Kitchen"", ""Hangers"", ""Hot...",2.0,1.0,1.0
97,176342,11C Landmark/Checkpoint Charlie/120m2/2BATH/3BR,"Berlin, Germany",Südliche Friedrichstadt,Entire home/apt,$296.00,52.50254,13.39134,"{'index': 97, 'id': '176342', 'amenities': '[""...",97,176342,"[""Dishes and silverware"", ""Lockbox"", ""Kitchen""...",6.0,6.0,3.0
98,176355,11A Landmark/Checkpoint Charlie/110m2/2BATH/2BR,"Berlin, Germany",Südliche Friedrichstadt,Entire home/apt,$276.00,52.50254,13.39134,"{'index': 98, 'id': '176355', 'amenities': '[""...",98,176355,"[""Dishes and silverware"", ""Lockbox"", ""Kitchen""...",4.0,4.0,2.0
