In [None]:
import json
import pandas as pd
import re
import pyproj

In [None]:
with open('WC_PRAHA.json', encoding='utf-8') as json_file:
    zdroj_data = json.load(json_file)

In [None]:
data_list = []

# tvorba seznamu a pridani dat do seznamu
for feature in zdroj_data['features']:
    object_id = feature['properties']['OBJECTID']
    address = feature['properties']['ADRESA']
    opening_hours = feature['properties']['OTEVRENO']
    price = feature['properties']['CENA']
    longitude, latitude = feature["geometry"]["coordinates"]
    
    data_list.append([object_id, address, opening_hours, price, longitude, latitude])


In [None]:
# tvorba dataframe
df = pd.DataFrame(data_list, columns=['OBJECTID', 'ADRESA', 'OTEVÍRACÍ DOBA', 'CENA', 'LONGITUDE', 'LATITUDE'])

In [None]:
df.head()


Unnamed: 0,OBJECTID,ADRESA,OTEVÍRACÍ DOBA,CENA,LONGITUDE,LATITUDE
0,1,Libuňská 632/1,po-ne 6:30-17:30,jednotná cena 8 Kč (záloha 70Kč),-728795.406759,-1041664.0
1,2,Nad stanicí 42,po-pá 7:05-19:20; so-ne 8:05-16:50,zdarma,-748010.131796,-1041467.0
2,3,"Türkova 2317/5, Praha 4",nonstop,zdarma,-738463.580368,-1049301.0
3,4,"Chodovská 228/3, Praha 4",po-ne 8:00-01:00,zdarma,-739589.705782,-1047363.0
4,5,"Olomoucká 2332, Horní Počernice",po-ne 7:00-22:00,zdarma,-727107.112037,-1042838.0


In [None]:
#převod Křováka na GPS, definice systémů
krovak = pyproj.Proj(init="epsg:5514")
gps = pyproj.Proj(init="epsg:4326")

  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)


In [None]:
#funkce na převedení souřadnic
def trans_to_gps(row):
    longitude, latitude = pyproj.transform(krovak, gps, row["LONGITUDE"], row["LATITUDE"])
    return pd.Series([longitude, latitude], index=["LONGITUDE_GPS", "LATITUDE_GPS"])

In [None]:
df[["LONGITUDE_GPS", "LATITUDE_GPS"]] = df.apply(trans_to_gps, axis=1)

  longitude, latitude = pyproj.transform(krovak, gps, row["LONGITUDE"], row["LATITUDE"])


In [None]:
df.head()

Unnamed: 0,OBJECTID,ADRESA,OTEVÍRACÍ DOBA,CENA,LONGITUDE,LATITUDE,LONGITUDE_GPS,LATITUDE_GPS
0,1,Libuňská 632/1,po-ne 6:30-17:30,jednotná cena 8 Kč (záloha 70Kč),-728795.406759,-1041664.0,14.612813,50.116499
1,2,Nad stanicí 42,po-pá 7:05-19:20; so-ne 8:05-16:50,zdarma,-748010.131796,-1041467.0,14.346302,50.094835
2,3,"Türkova 2317/5, Praha 4",nonstop,zdarma,-738463.580368,-1049301.0,14.493324,50.03676
3,4,"Chodovská 228/3, Praha 4",po-ne 8:00-01:00,zdarma,-739589.705782,-1047363.0,14.474079,50.052655
4,5,"Olomoucká 2332, Horní Počernice",po-ne 7:00-22:00,zdarma,-727107.112037,-1042838.0,14.638399,50.10807


In [None]:
hours = df['OTEVÍRACÍ DOBA']
(hours.unique())

array(['po-ne 6:30-17:30', 'po-pá 7:05-19:20; so-ne 8:05-16:50',
       'nonstop', 'po-ne 8:00-01:00', 'po-ne 7:00-22:00',
       'po-ne 6:50-17:45', 'po-ne 9:00-21:00', 'po-ne 9-21 h',
       'v době provozu školy-po dohodě',
       'po-pá 6:00-21:00; so-ne 8:00-20:00', 'po-ne 9:00- 21:00', None,
       'po-ne: 6:00-24:00', 'po a st 8:00-17:00', 'po-ne 9-23 h',
       'po-ne 6-24 h', 'po-pá 6:00-21:00; so-ne 7:00-21:00',
       'po-ne 9:00 - 21:00', 'po-pá 7-20 h; so-ne 8-19 h',
       'po-čt, ne 6:30-23:30; pá-so 6:30-24:00', 'po-ne 6:00-24:00',
       'po-pá 6:00-21:00; so-ne 8:00-21:00',
       'po-pá 6:00-21:00; so-ne 9:00-15:00', 'po-pá 6-21 h; so-ne 8-20 h',
       'po-ne 6:00-01:00', 'po-ne 05:00-23:30', 'po-pá 7:00-16:00',
       'po-so 8:00-20:00', 'po-pá 8:00-16:00', 'po-ne 6:00-21:00',
       'listopad prosinec leden únor 8:00-17:00,březen duben říjen+dušičky 8:00-18:00,květen-září 8:00-19:00',
       'po-so 9:00-24:00; ne 10:00-24:00', 'po-ne 05:00-23:00',
       'po-ne 10

In [None]:
for index, row in df.iterrows():
    opening_time = row["OTEVÍRACÍ DOBA"]
    if opening_time is not None and "nonstop" in opening_time:
        df.at[index, "OTEVÍRACÍ DOBA"] = "po-ne 0:00-24:00"

In [None]:
def uprav_oteviraci_dobu(oteviraci_doba):
    if oteviraci_doba is not None and pd.notna(oteviraci_doba):
        oteviraci_doba = oteviraci_doba.replace(' - ', '-').replace('h', ':00').replace(' ', '')
        if any(form in oteviraci_doba for form in ['9-23h', '9-21h', '6-24h']):
            cas = oteviraci_doba.split(' ')[-1]
            return cas
        else:
            return oteviraci_doba
    else:
        return None

In [None]:
df["PRACOVNÍ DNY"] = None
df["VÍKEND"] = None

for index, row in df.iterrows():
    opening_time = row["OTEVÍRACÍ DOBA"]
    if isinstance(opening_time, str): 
        time_ranges = opening_time.split(";")
        for time_range in time_ranges:
            if "po-pá" in time_range:
                if df.at[index, "PRACOVNÍ DNY"] is None:
                    df.at[index, "PRACOVNÍ DNY"] = time_range
            if "po-ne" in time_range:
                if df.at[index, "PRACOVNÍ DNY"] is None:
                    df.at[index, "PRACOVNÍ DNY"] = time_range
                if df.at[index, "VÍKEND"] is None:
                    df.at[index, "VÍKEND"] = time_range

In [None]:
for index, row in df.iterrows():
    opening_time = row["OTEVÍRACÍ DOBA"]
    if isinstance(opening_time, str):  
        time_ranges = opening_time.split(";")
        for time_range in time_ranges:
            if "so-ne" in time_range:
                if pd.isna(df.at[index, "VÍKEND"]):  
                    df.at[index, "VÍKEND"] = time_range

In [None]:
def extrahuj_cas(oteviraci_doba):
    if oteviraci_doba is not None and pd.notna(oteviraci_doba):
        if 'h' in oteviraci_doba:
            cas = oteviraci_doba.replace('h', ':00').replace(' ', '')
        else:
            cas = oteviraci_doba
        cas_od_do = cas.split(' ')[-1].split('-')
        cas_od = cas_od_do[0] if len(cas_od_do) > 1 else None
        cas_do = cas_od_do[1] if len(cas_od_do) > 1 else None
        return cas_od, cas_do
    else:
        return None, None


df['OTEVRENO_OD_PD'] = df['PRACOVNÍ DNY'].apply(lambda x: extrahuj_cas(x)[0])
df['OTEVRENO_DO_PD'] = df['PRACOVNÍ DNY'].apply(lambda x: extrahuj_cas(x)[1])

df['OTEVRENO_OD_VIKEND'] = df['VÍKEND'].apply(lambda x: extrahuj_cas(x)[0])
df['OTEVRENO_DO_VIKEND'] = df['VÍKEND'].apply(lambda x: extrahuj_cas(x)[1])



Unnamed: 0,OBJECTID,ADRESA,OTEVÍRACÍ DOBA,CENA,LONGITUDE,LATITUDE,LONGITUDE_GPS,LATITUDE_GPS,PRACOVNÍ DNY,VÍKEND,OTEVRENO_OD_PD,OTEVRENO_DO_PD,OTEVRENO_OD_VIKEND,OTEVRENO_DO_VIKEND
0,1,Libuňská 632/1,po-ne 6:30-17:30,jednotná cena 8 Kč (záloha 70Kč),-728795.406759,-1041664.0,14.612813,50.116499,po-ne 6:30-17:30,po-ne 6:30-17:30,6:30,17:30,6:30,17:30
1,2,Nad stanicí 42,po-pá 7:05-19:20; so-ne 8:05-16:50,zdarma,-748010.131796,-1041467.0,14.346302,50.094835,po-pá 7:05-19:20,so-ne 8:05-16:50,7:05,19:20,8:05,16:50
2,3,"Türkova 2317/5, Praha 4",po-ne 0:00-24:00,zdarma,-738463.580368,-1049301.0,14.493324,50.03676,po-ne 0:00-24:00,po-ne 0:00-24:00,0:00,24:00,0:00,24:00
3,4,"Chodovská 228/3, Praha 4",po-ne 8:00-01:00,zdarma,-739589.705782,-1047363.0,14.474079,50.052655,po-ne 8:00-01:00,po-ne 8:00-01:00,8:00,01:00,8:00,01:00
4,5,"Olomoucká 2332, Horní Počernice",po-ne 7:00-22:00,zdarma,-727107.112037,-1042838.0,14.638399,50.10807,po-ne 7:00-22:00,po-ne 7:00-22:00,7:00,22:00,7:00,22:00


In [None]:
df['OTEVRENO_OD_PD'] = df['OTEVRENO_OD_PD'].str.replace('.', ':')
df['OTEVRENO_DO_PD'] = df['OTEVRENO_DO_PD'].str.replace('.', ':')
df['OTEVRENO_OD_VIKEND'] = df['OTEVRENO_OD_VIKEND'].str.replace('.', ':')
df['OTEVRENO_DO_VIKEND'] = df['OTEVRENO_DO_VIKEND'].str.replace('.', ':')

In [None]:
df['BEZBAR'] = None

In [None]:
data_dict = df.to_dict(orient='records')

with open('praha_ciste.json', 'w', encoding='utf-8') as json_file:
    json.dump(data_dict, json_file, ensure_ascii=False)