In [None]:
import pandas as pd

## Main Format algorithm

In [None]:
def format(
        data_path:str, 
        res_path:str, 
        weight_name:str, 
        class_name:str="class", 
        geoposition_name:str="Геопозиция", 
        normalize:bool=True
    ) -> None:

    data = pd.read_csv(data_path, index_col=0)

    classes = data[class_name].unique()

    max_in_class = {}
    for cl in classes:
        if normalize:
            max_value = data.loc[data[class_name] == cl][weight_name].max()
        else:
            max_value = 1
        max_in_class.update({cl: max_value})

    new_data = pd.DataFrame({"id":[], "longitude": [], "latitude":[], "modularity_class":[], "weight":[]})
    new_data = new_data.astype({"id": int, "modularity_class": int, "weight": float})

    for i in range(data.shape[0]-1, -1, -1):
        modularity_class = int(data.iloc[i][class_name])
        weight = float(data.iloc[i][weight_name])/max_in_class.get(modularity_class)
        latitude, longitude = data.iloc[i][geoposition_name][1:-1].split(",")
        row = [int(data.iloc[i].name), float(longitude), float(latitude), modularity_class, weight]
        new_data = pd.concat([pd.DataFrame([row], columns=new_data.columns), new_data], ignore_index=True)

    new_data = new_data.set_index("id")

    new_data.to_csv(res_path)
    print(f"saved to {res_path}")

In [None]:
format("../data/result/Nodes (mod-new)20/legal_entities_with_class.csv", "legal_entities.csv", "Количество торговых объектов местного значения, ед.", normalize=False)

In [None]:
format("../data/result/leiden_labels/Residents_with_class.csv", "../data/format/leiden_labels", "Кол-во жильцов")

### Concat

In [None]:
residents = pd.read_csv("../data/format/Nodes (mod-new)20/residents_class.csv", index_col=0)
residents

In [None]:
residents.loc[0, "longitude"]

In [None]:
residents.iloc[0].name

In [None]:
subways = pd.read_csv("../data/format/Nodes (mod-new)20/subway_class.csv", index_col=0)
subways

In [None]:
concat_data = pd.concat([residents, subways], ignore_index=True)
concat_data

In [None]:
concat_data.to_csv("../data/format/Nodes (mod-new)20/concat_data.csv")

## Class analysis

In [None]:
class_res = pd.DataFrame({"class": [], "count_addresses": [], "mean_res": [], "max_res": [], "count_more_than_0.5": [], "%":[]})

for cl in classes:
    tmp = new_data.loc[new_data["modularity_class"] == cl]
    count_addresses = tmp["residents"].count()
    count_more_than_05 = tmp.loc[tmp["residents"] > 0.5].index.to_list()
    row = [cl, 
           count_addresses,
           tmp["residents"].mean(),
           tmp["residents"].max(),
           count_more_than_05,
           len(count_more_than_05)/count_addresses]
    class_res = pd.concat([pd.DataFrame([row], columns=class_res.columns), class_res], ignore_index=True)
    
class_res

In [None]:
class_res.loc[class_res["class"] == 17021]

In [None]:
def sort_key(val):
    return val.apply(lambda x: len(x))
class_res.sort_values(by="count_more_than_0.5", key=sort_key, ascending=False)

In [None]:
class_res.sort_values(by="%", ascending=False)

In [None]:
class_res.loc[class_res["count_more_than_0.5"].apply(lambda x: len(x)) == 4]

## Formatting MFC data

In [None]:
data = pd.read_csv("../data/raw data/Схема_размещения_МФЦ.csv", index_col=1)
data.index.name = "id"
data = data.drop("Unnamed: 0", axis=1)
# data = data.drop("Тип офиса \n(МФЦ/ ЦОУ)*", axis=1)
data = data.drop("Наименование структурного подразделения", axis=1)
data["latitude"] = data["Координаты"].apply(lambda coor: coor.split("\n")[0])
data["longitude"] = data["Координаты"].apply(lambda coor: coor.split("\n")[1])
data = data.drop("Координаты", axis=1)
data

In [None]:
data.to_csv("../data/result/mfc.csv")

## osm node format

In [None]:
data = pd.read_csv("../data/result/commercial.csv", index_col=0)
data

In [None]:
raw_data = pd.read_csv("../data/raw data/Нежилые_объекты.csv")
raw_data

In [None]:
raw_data["street"] = raw_data["Дом"] + ", " + raw_data["Улица"]