# Generate CSV file from JSON output from Open Street Map

This notebook take a json file as input and create a csv file that can be loaded from pandas.

In [1]:
import numpy as np
import pandas as pd
import json

In [2]:
with open("./data/parisian_restaurants.json", "r") as f:
    json_file = json.load(f)

In [3]:
restaurants = json_file["elements"]

In [4]:
names, vegan, vegetarian, speciality, lat, long = [], [], [], [], [], []

for restaurant in restaurants:
    names.append(restaurant["tags"]["name"] if "name" in restaurant["tags"].keys() else "")
    vegan.append(int("diet:vegan" in restaurant["tags"].keys() and restaurant["tags"]["diet:vegan"] == "yes"))
    vegetarian.append(int("diet:vegetarian" in restaurant["tags"].keys() and restaurant["tags"]["diet:vegetarian"] == "yes"))
    speciality.append(restaurant["tags"]["cuisine"] if "cuisine" in restaurant["tags"].keys() else "na")
    lat.append(restaurant["lat"])
    long.append(restaurant["lon"])

In [5]:
df = pd.DataFrame()
df["name"] = names
df["speciality"] = speciality
df["vegan"] = vegan
df["vegetarian"] = vegetarian
df["lat"] = lat
df["long"] = long

In [6]:
import re
a = list(df["speciality"])
a = np.concatenate([re.split(",|;", sp) for sp in a])
a = [sp.lower().replace(" ", "").replace("_", "") for sp in a]
b = np.unique(a)

In [7]:
d = [["africaine", "cambodgienne", "côted'ivoire", "congo", "cambodgienne", "senegalese", "ivorycoast", "southafrican", "sénégal", 'north-african', 'northafrican', "ivorian", "capvert", "ethiopian", "laotian", "westafrican", "african"],
     ["couscous", "morocan", "algerian", 'oriental', 'orientalcouscous', "tunisian", 'marocain', 'marocco', "tunisiane", "moroccan", "maghrébine", "maghreb"],
    ["américaine", "burger", "oyster", "southamericanbarbecueandnaturalwine", "newyorkpizza", "american"],
    ["latinamerican", "latino"],
    ["crepe", "crepes", "galettes", "crêperie", "breton"], 
    ["français", "cuisinenissarde", "hairdresser", "française", 'alsatian', "frenchtraditional", "sud-ouestfrance", "auvergne", "repaslesmidis", "new-french", "french"],
    ['traditionnel', 'traditionnal', "smarttraditional", "traditional"],
    ["fait-maison", "homemade"],
    ["gastronomiefrançaise", "frenchgastronomy", "françaisegastronomique", "gastr", "gastronomie", "gastronomique", "gastronomic"],
    ["hawaii", "hawaiian"],
    ["bistro", "brasserie", "bar", "cafe", "coffee", "coffeeshop", "bistrotgastronome", "bistrot"],
    ["argentine", "argentinian", "peruvian", "southamerican"],
    ["asianfusion", "asian"],
    ["brazilian", "bresilian"],
    ["caribbean", "créole", "creole"], 
    ["corsica", "corsican"], 
    ["chinese-fonduepékinoise", "bobun", "phở", "cantonese", "chineses(teochew)-中国潮州", "火鍋", "huoguo", 'lanzhou', 'lao', 'shandong', "yunnan", 'shanxi', 'sichuan', "jiangxi", "chinese"], 
    ["deli", "delicatessen"], 
    ["libanais", "lebanese", "libanese"],
    ["pizza", "italian_pizza", "italianpizza", "pizzasàemporter", "pasta", "pâtes", "italian"],
    ["francoportugaise", "portuguese"],
    ["japaneseandchinese", "japonais", "japonaise", "sushi", "japanese"],
    ["pokebowl", "bowl"],
    ["friture", "fries", "friedfood"],
    ["barbecue", "rotisserie", "grill"],
    ["indianpakistanese", "pakistan", "pakistanaisindien", "pakistani", "océanindien", "pakistanese", "indian"], 
    ['vietnam', 'vietnamese', 'vietnamien-cambodgien', 'vietnamien'],
    ["smoothie", "juice", "smoothies", "cocktails"],
    ['thaï', 'thaifruitjuces', 'thai'],
    ["world", "international"],
    ["baràvins", "vin", "vins", "wine"], 
    ["colombian", "columbian"],
    ["diet:vegetarian=only", "vegetarian"], 
    ["maisonbioproducteursbols", "bio"],
    ['tibet', 'tibetan'],
    ['salad', 'saladbar', 'salade', 'salades', 'salads'], 
    ["spécialitésréunionaises", "reunionisland"],
    ["bavarian", "allemand", "flammkuchen", "deutch"],
    ['kurdish', 'kurde'],
    ["salondethé", "bubbletea", "tea"],
    ['mauricius', 'mauritian'],
    ['persan', 'persian'],
    ["savorypancakes", "pancake"],
    ['russe', 'russian'],
    ["jewish", "kosher"],
    ['mexico', 'mexican'],
    ["poisson", "fish"], 
    ["turque", "turkish"], 
    ["mozzarella", "cheese"],
    ["quebec", "canada"],
    ['noodle', 'noodles'],
    ["finedining", "diner"],
    ['bagel', 'bagels'],
    ["mésopotamieetanatolie", "middleeastern", "iranian"]]
     

In [8]:
def create_dict(specialities, same_spec):
    r = {}
    for sp in specialities:
        ok = False
        for row in same_spec:
            if sp in row:
                r[sp] = row[-1]
                ok = True
        if not ok:
            r[sp] = sp
    return r

In [9]:
sp_dict = create_dict(b, d)

In [13]:
r = []
for sp in df["speciality"]:
    specs = re.split(",|;", sp.lower().replace(" ", "").replace("_", ""))
    for i, s in enumerate(specs):
        specs[i] = sp_dict[s]
    specs = list(set(specs))
    specs = ",".join(specs)
    r.append(specs)

In [14]:
df["speciality"] = r

In [16]:
df.to_csv("./data/restaurants.csv")

In [18]:
r = np.random.RandomState(2020)

In [19]:
r.get_state()

('MT19937', array([      2020, 1803034869, 2259915846, 4245545687, 3903451560,
        2098060156, 2102951255, 2220259573, 4288342139, 3117281121,
        3541069081,   60028237, 3368090733, 3392959859, 2596488510,
        3283840187,  522738856, 1293579865, 4046309578, 4043491424,
        3523938851, 2376474293, 3556445513, 2435766345, 3880286383,
        3839862773, 1557785128, 4083700552, 3351838643, 2700218509,
        3527806857,  414907281, 3747404117,  155895055, 1700644365,
        2807008991, 3118637653, 3282803576,  759573165, 2420140904,
         181148410, 2982964939, 1617970807, 3610964665, 2436713870,
        1250133097, 1184294198,  313777634,  199392346, 1130183091,
         595985772, 1924853711, 3300383354,   96662258, 1451973552,
        3873695244, 2893073187, 3743141182, 1064869451, 2405013202,
        1606275148, 2289525662, 1152174282, 2972506966, 1465810020,
         194781466,   33945988, 2172589399, 2329995213, 3513924592,
        3955995173, 4038958917, 14757

In [20]:
r.uniform(0, 1)

0.9862768288615988

In [22]:
s = r.get_state()

In [31]:
print(r.uniform(0, 1))
r.set_state(s)

0.8733919458206546
