In [2]:
import json
import pandas as pd
import re
import pyproj

In [3]:
with open('WC_PRAHA.json', encoding='utf-8') as json_file:
    zdroj_data = json.load(json_file)

In [4]:
data_list = []

for feature in zdroj_data['features']:
    object_id = feature['properties']['OBJECTID']
    address = feature['properties']['ADRESA']
    opening_hours = feature['properties']['OTEVRENO']
    price = feature['properties']['CENA']
    longitude, latitude = feature["geometry"]["coordinates"]
    
    data_list.append([object_id, address, opening_hours, price, longitude, latitude])


In [5]:

df = pd.DataFrame(data_list, columns=['OBJECTID', 'ADRESA', 'OTEVÍRACÍ DOBA', 'CENA', 'LONGITUDE', 'LATITUDE'])

In [6]:

krovak = pyproj.Proj(init="epsg:5514")
gps = pyproj.Proj(init="epsg:4326")

  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)


In [7]:

def trans_to_gps(row):
    longitude, latitude = pyproj.transform(krovak, gps, row["LONGITUDE"], row["LATITUDE"])
    return pd.Series([longitude, latitude], index=["LONGITUDE_GPS", "LATITUDE_GPS"])

In [8]:
df[["LONGITUDE_GPS", "LATITUDE_GPS"]] = df.apply(trans_to_gps, axis=1)

  longitude, latitude = pyproj.transform(krovak, gps, row["LONGITUDE"], row["LATITUDE"])


In [9]:
hours = df['OTEVÍRACÍ DOBA']
(hours.unique())

array(['po-ne 6:30-17:30', 'po-pá 7:05-19:20; so-ne 8:05-16:50',
       'nonstop', 'po-ne 8:00-01:00', 'po-ne 7:00-22:00',
       'po-ne 6:50-17:45', 'po-ne 9:00-21:00', 'po-ne 9-21 h',
       'v době provozu školy-po dohodě',
       'po-pá 6:00-21:00; so-ne 8:00-20:00', 'po-ne 9:00- 21:00', None,
       'po-ne: 6:00-24:00', 'po a st 8:00-17:00', 'po-ne 9-23 h',
       'po-ne 6-24 h', 'po-pá 6:00-21:00; so-ne 7:00-21:00',
       'po-ne 9:00 - 21:00', 'po-pá 7-20 h; so-ne 8-19 h',
       'po-čt, ne 6:30-23:30; pá-so 6:30-24:00', 'po-ne 6:00-24:00',
       'po-pá 6:00-21:00; so-ne 8:00-21:00',
       'po-pá 6:00-21:00; so-ne 9:00-15:00', 'po-pá 6-21 h; so-ne 8-20 h',
       'po-ne 6:00-01:00', 'po-ne 05:00-23:30', 'po-pá 7:00-16:00',
       'po-so 8:00-20:00', 'po-pá 8:00-16:00', 'po-ne 6:00-21:00',
       'listopad prosinec leden únor 8:00-17:00,březen duben říjen+dušičky 8:00-18:00,květen-září 8:00-19:00',
       'po-so 9:00-24:00; ne 10:00-24:00', 'po-ne 05:00-23:00',
       'po-ne 10

In [10]:
for index, row in df.iterrows():
    opening_time = row["OTEVÍRACÍ DOBA"]
    if opening_time is not None and "nonstop" in opening_time:
        df.at[index, "OTEVÍRACÍ DOBA"] = "po-ne 0:00-24:00"

In [11]:
def uprav_oteviraci_dobu(oteviraci_doba):
    if oteviraci_doba is not None and pd.notna(oteviraci_doba):
        oteviraci_doba = oteviraci_doba.replace(' - ', '-').replace('h', ':00').replace(' ', '')
        if any(form in oteviraci_doba for form in ['9-23h', '9-21h', '6-24h']):
            cas = oteviraci_doba.split(' ')[-1]
            return cas
        else:
            return oteviraci_doba
    else:
        return None

In [12]:
df["PRACOVNÍ DNY"] = None
df["VÍKEND"] = None

for index, row in df.iterrows():
    opening_time = row["OTEVÍRACÍ DOBA"]
    if isinstance(opening_time, str): 
        time_ranges = opening_time.split(";")
        for time_range in time_ranges:
            if "po-pá" in time_range:
                if df.at[index, "PRACOVNÍ DNY"] is None:
                    df.at[index, "PRACOVNÍ DNY"] = time_range
            if "po-ne" in time_range:
                if df.at[index, "PRACOVNÍ DNY"] is None:
                    df.at[index, "PRACOVNÍ DNY"] = time_range
                if df.at[index, "VÍKEND"] is None:
                    df.at[index, "VÍKEND"] = time_range

In [13]:
for index, row in df.iterrows():
    opening_time = row["OTEVÍRACÍ DOBA"]
    if isinstance(opening_time, str):  
        time_ranges = opening_time.split(";")
        for time_range in time_ranges:
            if "so-ne" in time_range:
                if pd.isna(df.at[index, "VÍKEND"]):  
                    df.at[index, "VÍKEND"] = time_range

In [14]:
def extrahuj_cas(oteviraci_doba):
    if oteviraci_doba is not None and pd.notna(oteviraci_doba):
        if 'h' in oteviraci_doba:
            cas = oteviraci_doba.replace('h', ':00').replace(' ', '')
        else:
            cas = oteviraci_doba
        cas_od_do = cas.split(' ')[-1].split('-')
        cas_od = cas_od_do[0] if len(cas_od_do) > 1 else None
        cas_do = cas_od_do[1] if len(cas_od_do) > 1 else None
        return cas_od, cas_do
    else:
        return None, None


df['OTEVRENO_OD_PD'] = df['PRACOVNÍ DNY'].apply(lambda x: extrahuj_cas(x)[0])
df['OTEVRENO_DO_PD'] = df['PRACOVNÍ DNY'].apply(lambda x: extrahuj_cas(x)[1])

df['OTEVRENO_OD_VIKEND'] = df['VÍKEND'].apply(lambda x: extrahuj_cas(x)[0])
df['OTEVRENO_DO_VIKEND'] = df['VÍKEND'].apply(lambda x: extrahuj_cas(x)[1])



In [15]:
df['OTEVRENO_OD_PD'] = df['OTEVRENO_OD_PD'].str.replace('.', ':')
df['OTEVRENO_DO_PD'] = df['OTEVRENO_DO_PD'].str.replace('.', ':')
df['OTEVRENO_OD_VIKEND'] = df['OTEVRENO_OD_VIKEND'].str.replace('.', ':')
df['OTEVRENO_DO_VIKEND'] = df['OTEVRENO_DO_VIKEND'].str.replace('.', ':')

In [16]:
df['BEZBAR'] = None

In [17]:
data_dict = df.to_dict(orient='records')

with open('praha_ciste.json', 'w', encoding='utf-8') as json_file:
    json.dump(data_dict, json_file, ensure_ascii=False)