In [423]:
import xmltodict
import requests
import re

def get_dataset_ids(list_of_table_names):
    r = requests.get("https://osp-rs.stat.gov.lt/rest_xml/dataflow/")
    metadata_dict = dict(xmltodict.parse(r.text))
    dataset_ids = []
    
    for i in metadata_dict["mes:Structure"]["mes:Structures"]["str:Dataflows"]["str:Dataflow"]:
        matches = []
        for j in list_of_table_names:
            j = j.lower()
            j = re.sub(r'[^\w\s]','',j)
            j = re.sub(' +', ' ',j)
            name = i["com:Name"][0]["#text"].lower()
            name = re.sub(r'[^\w\s]','',name)
            name = re.sub(' +', ' ',name)
            matches.append(j in name)
        if any(matches):
            dataset_ids.append(i["@id"])
            
    return dataset_ids

def construct_links(dataset_ids,start_period="2015-01"):
    links = []
    link_beginning = "https://osp-rs.stat.gov.lt/rest_json/data/"

    for i in dataset_ids:
        links.append(link_beginning + i + "/?startPeriod=" + start_period)
    
    return links

In [1097]:
import json
import pandas as pd

miestai = ["Kauno m. sav.","Vilniaus m. sav.","Klaipėdos m. sav.","Šiaulių m. sav.","Panevežio m. sav.","Alytaus m. sav."]

def dataset_to_dataframe(link):
    r = requests.get(link)
    json_dict = r.json()
    skip = False
    
    if not json_dict["dataSets"][0]["observations"]:
        skip = True
    for i in json_dict["structure"]["attributes"]["dataSet"]:
        if i["id"] == "DS_REGIONAL":
            if i["values"][0]["id"]=="N":
                skip = True
      #  if i["id"] == "DS_TIME_FORMAT":
      #      if i["values"][0]["name"] not in ["Metai","Mokymo metai"]:
      #          skip = True
    
    if not skip:
        dimensions_dict = {}

        for i in json_dict["structure"]["dimensions"]["observation"]:
            dimensions_dict[i["name"]] = i["keyPosition"]

            
      #  if list(dimensions_dict.keys())[0] != "Administracinė teritorija":
       #     return "This dataset has an unusual spatial dimension"
        list_of_mappings = []

        for i in json_dict["structure"]["dimensions"]["observation"]:
            mapping_dict = {}
            for ind,j in enumerate(i["values"]):
                mapping_dict[str(ind)] = j["name"]
            list_of_mappings.append(mapping_dict)   

        rows = []

        for i in json_dict["dataSets"][0]["observations"].keys():
            rows.append(i.split(":"))

        df = pd.DataFrame(rows)
        for i, j in enumerate(list_of_mappings):
            df[i].replace(j,inplace=True)

        df.columns = dimensions_dict.keys()

        
        values = []

        for i in json_dict["dataSets"][0]["observations"].values():
            values.append(i[0])
        
        df["Reikšmė"]=pd.Series(values)

        df=df[df["Administracinė teritorija"].isin(miestai)]
        
        df.name = json_dict["structure"]["name"]
        
        return df
    else:
        print("Skipped")

In [1575]:
table_names = ["Perdirbtos komunalinės atliekos",
            "Teršalų, išmestų į aplinkos orą iš stacionarių taršos šaltinių","Ūkio, buities ir gamybos nuotekų išleidimas į paviršinius vandenis",
              "Vandens sunaudojimas","Autobusų rida","Vidutiniškai vienam gyventojui tenka kelionių autobusais"]

table_ids=get_dataset_ids(table_names)

table_ids

['S1R078_M8010302',
 'S1R079_M8010301',
 'S5R003_M7030327_3',
 'S5R003_M7030211_3',
 'S5R087_M7030320_1',
 'S1R121_M9070335_1',
 'S1R121_M9070335_2',
 'S1R124_M9070338_1']

In [1577]:
links=construct_links(table_ids)

df_list = []
for i in links:
    df_list.append(dataset_to_dataframe(i))

Skipped
Skipped
Skipped


In [1561]:
table_names = ["Nuolatinių gyventojų skaičius liepos 1 d."]
table_ids=get_dataset_ids(table_names)

links=construct_links(table_ids)

population_df = dataset_to_dataframe(links[0])

In [1562]:
population_df = population_df[population_df.iloc[:,1] == "Miestas ir kaimas"].iloc[:,[0,3,4]]

In [1829]:
df_list2 = [df_list[0],df_list[1],df_list[2],df_list[4],df_list[6]]

In [1830]:
df_list3 = []
for i in df_list2:
    x = pd.merge(i,population_df,on=["Laikotarpis","Administracinė teritorija"])
    x["per_thousand"]= x["Reikšmė_x"]/x["Reikšmė_y"]*1000
    df_list3.append(x)

In [1831]:
for i,j in enumerate(df_list3):
    if i != 3:
        j.drop(["Matavimo vienetai","Reikšmė_x","Reikšmė_y"],axis=1,inplace=True)
        j.rename(columns={"Administracinė teritorija": "city","Laikotarpis":"x","per_thousand":"y"},inplace=True)

In [1810]:
df_list3[3] = df_list3[3][df_list3[3].iloc[:,1]=="Autobusai"]

df_list3[3].drop(columns=["Matavimo vienetai","Reikšmė_y","per_thousand","Transporto rūšis (Autobusai)"],inplace=True)
df_list3[3] = df_list3[3].replace("Klaipėdos m. sav.", "Klaipėda").replace("Kauno m. sav.", "Kaunas").replace("Vilniaus m. sav.", "Vilnius").replace("Panevėžio m. sav.", "Panevėžys").replace("Šiaulių m. sav.", "Šiauliai").replace("Alytaus m. sav.", "Alytus").round(1)
df_list3[3].rename(columns={"Administracinė teritorija": "city","Laikotarpis":"x","Reikšmė_x":"y"},inplace=True)

In [1816]:
df_list3[3].to_csv("kelioniu_kiekis.csv",index=False)

In [1827]:
grouped=df_list3[4][df_list3[4]["Teršalai"].isin(["Azoto oksidai, tonos","Anglies monoksidas, tonos","Kietosios medžiagos"])].round(1).groupby("Teršalai")

l=[grouped.get_group(x) for x in grouped.groups]
l=[i.iloc[:,[0,2,3]].replace("Klaipėdos m. sav.", "Klaipėda").replace("Kauno m. sav.", "Kaunas").replace("Vilniaus m. sav.", "Vilnius").replace("Panevėžio m. sav.", "Panevėžys").replace("Šiaulių m. sav.", "Šiauliai").replace("Alytaus m. sav.", "Alytus") for i in l]

In [1823]:
l[0].to_csv("tersalai_co.csv",index=False)
l[1].to_csv("tersalai_no.csv",index=False)
l[2].to_csv("tersalai_kietosios.csv",index=False)

In [1833]:
grouped=df_list3[1][df_list3[1].iloc[:,0].isin(["Sunaudota vandens energetikos reikmėms","Sunaudota vandens ūkio ir buities reikmėms"])].round(1).groupby("Vandens naudojimo paskirtis")

l=[grouped.get_group(x) for x in grouped.groups]
l=[i.iloc[:,[1,2,3]].replace("Klaipėdos m. sav.", "Klaipėda").replace("Kauno m. sav.", "Kaunas").replace("Vilniaus m. sav.", "Vilnius").replace("Panevėžio m. sav.", "Panevėžys").replace("Šiaulių m. sav.", "Šiauliai").replace("Alytaus m. sav.", "Alytus") for i in l]

In [1838]:
l[1].to_csv("vandens_sunaudojimas_buiciai.csv",index=False)
l[0].to_csv("vandens_sunaudojimas_energetikai.csv",index=False)

In [1822]:
df_list3[2][df_list3[2]["Reiso tipas"]=="Reguliarus reisas"].iloc[:,[1,2,3]].round(1).replace("Klaipėdos m. sav.", "Klaipėda").replace("Kauno m. sav.", "Kaunas").replace("Vilniaus m. sav.", "Vilnius").replace("Panevėžio m. sav.", "Panevėžys").replace("Šiaulių m. sav.", "Šiauliai").replace("Alytaus m. sav.", "Alytus").to_csv("viesojo_rida.csv",index=False)

In [487]:
import os

files = os.listdir("kpi")

files = [i for i in files if i not in  ["indicators_scores.csv","categories.csv","categories_ranks.csv","kpi.csv","total.csv","siuksles.csv","oras.csv","transportas.csv","vanduo.csv"]]
files_main = [i.split(".")[0] for i in files]

In [488]:
df = pd.read_csv("kpi/"+files[0]).rename(columns={"y":files_main[0]})
for i,j in enumerate(files[1:]):
    df = pd.merge(df,pd.read_csv("kpi/"+j),on=["x","city"],how="outer").rename(columns={"y":files_main[i+1]})

In [489]:
trans = [2,10]
water = [3,8,9]
air = [4,5,6]
trash = [7]

categories = [trans,water,air,trash]

In [490]:
df.iloc[:,[2,3,10]] =  -1 * df.iloc[:,[2,3,10]]

In [491]:
grouped = df.groupby("x")

l=[grouped.get_group(x) for x in grouped.groups]

In [492]:
df_list = []
df_ranks_list = []
for df in l:
    df_min_max = df.iloc[:,2:]
    df.iloc[:,2:] = abs(((df_min_max-df_min_max.min())/(df_min_max.max()-df_min_max.min()))-1)
    for j,i in enumerate(categories):
        df.iloc[:,i] = df.iloc[:,i] * (25/len(i))
    #    df["cat"+str(j)]=df.copy().groupby(['city'], sort=False).apply(lambda x: x.ffill()).iloc[:,categories[j]].sum(axis=1)
    #df.rename(columns={"cat0":"transportas","cat1":"vanduo","cat2":"oras","cat3":"šiukšles"},inplace=True)
    #df["total"] = df.iloc[:,10:10+len(categories)].sum(axis=1)
    #indices = [0,1,11,12,13,14,15]
    #df= df.iloc[:,indices]
    df=df.round(1)
    df_ranks = df.copy()
    df_ranks.iloc[:,2:] = df.iloc[:,2:].rank(ascending=False).rename(columns = lambda x: x+"_rank")
    df_ranks["city"]=df["city"]
    df_ranks_list.append(df_ranks)
    df_list.append(df)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, val, pi)


In [486]:
pd.concat(df_list).to_csv("categories.csv",index=False)
pd.concat(df_ranks_list).to_csv("categories_ranks.csv",index=False)

In [493]:
pd.concat(df_list).to_csv("indicators_scores.csv",index=False)

In [362]:
import numpy as np
waste = pd.read_csv("urb_cenv_1_Data.csv",encoding = "ISO-8859-1",na_values=":")

In [363]:
miestai = ["Klaipeda","Siauliai","Vilnius","Kaunas","Panevezys","Alytus"]

In [364]:
waste = waste[waste["CITIES"].isin(miestai)][(waste["TIME"]>=2015) & (waste["TIME"]<2020)].iloc[:,[0,1,3]]

  waste = waste[waste["CITIES"].isin(miestai)][(waste["TIME"]>=2015) & (waste["TIME"]<2020)].iloc[:,[0,1,3]]


In [365]:
l = []
for i in waste["Value"]:
    l.append(float(i))

waste["Value"] = l

In [366]:
waste.rename(columns={"TIME":"x","CITIES":"city","Value":"y"},inplace=True)
waste.replace({"Panevezys":"Panevėžys","Siauliai":"Šiauliai","Klaipeda":"Klaipėda"},inplace=True)

In [367]:
population_df = population_df.replace("Klaipėdos m. sav.", "Klaipėda").replace("Kauno m. sav.", "Kaunas").replace("Vilniaus m. sav.", "Vilnius").replace("Panevėžio m. sav.", "Panevėžys").replace("Šiaulių m. sav.", "Šiauliai").replace("Alytaus m. sav.", "Alytus")
population_df.rename(columns={"Administracinė teritorija":"city","Laikotarpis":"x","Reikšmė":"y"},inplace=True)
population_df["x"]=pd.to_numeric(population_df["x"])

In [368]:
waste= pd.merge(waste,population_df,on=["x","city"])
waste["y"]= waste["y_x"]/waste["y_y"]*1000

waste = waste.loc[:,["x","city","y"]]
#waste['y'] = waste.groupby(['city'], sort=False)['y'].apply(lambda x: x.ffill())
waste.dropna().round(1).to_csv("kpi/siuksles_surinktos.csv",index=False)