In [161]:
import json
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt


In [162]:
# Change some pandas display options
pd.set_option("display.max_rows", 10000)
pd.set_option("display.max_columns", 500)
pd.set_option("display.max_colwidth", 0)
pd.set_option("display.width", 2000)
pd.set_option("styler.format.precision", 5)


# Functions


In [163]:
def normalize_json(data):
    spaces_details = pd.json_normalize(data, record_path=["available_dates"], meta=["id"], errors="ignore")

    # Change columns type
    spaces_details["start"] = spaces_details["start"].astype("datetime64[ns]")
    spaces_details["end"] = spaces_details["end"].astype("datetime64[ns]")
    spaces_details["cancellable"] = spaces_details["cancellable"].astype(int)

    # Split date from time
    spaces_details["date"] = spaces_details["start"].dt.to_period("d").astype("datetime64[ns]")
    # spaces_details["date"] = spaces_details["date"].astype("str")

    return spaces_details


def encode_time_span(spaces_details: pd.DataFrame):
    def encode(row: pd.Series):
        if row["end"].hour == 23:
            end = str(row["end"].hour)
        else:
            end = str(row["end"].hour - 1)
        start = str(row["start"].hour)

        row.loc[start:end] = 1

        return row

    zeros_matrix = np.zeros(shape=(spaces_details.shape[0], 24), dtype="int")

    zeros_matrix = pd.DataFrame(zeros_matrix, columns=[str(i) for i in range(24)])

    spaces_details = pd.concat([spaces_details, zeros_matrix], axis=1)
    return spaces_details.apply(encode, axis=1)


def add_total_time_span(spaces_details: pd.DataFrame):
    # Get available time span for each date
    # spaces_details["time_span"] = (spaces_details["end"] - spaces_details["start"]).astype("timedelta64[h]")
    spaces_details["total_time_span"] = spaces_details.loc[:, "0":"23"].sum(axis=1)

    # Correct time span to be 24 for whole day
    # spaces_details["time_span"] = spaces_details["time_span"].apply(lambda x: x + 1 if x == 23 else x)
    return spaces_details


def add_cancellable_span(spaces_details: pd.DataFrame):
    # Add new cancellable percentage column
    spaces_details["cancellable_span"] = spaces_details["cancellable"] * spaces_details["total_time_span"]

    return spaces_details


def add_num_of_spaces(spaces_details):
    spaces_details["num_spaces"] = 1
    return spaces_details


# Read json data


In [164]:
with open("../data/raw/spaces_dummy_data_v2.json", mode="r") as f:
    data = json.load(f)


# Normalize data


In [165]:
spaces_details = normalize_json(data)
spaces_details = encode_time_span(spaces_details)
spaces_details = add_total_time_span(spaces_details)
spaces_details = add_cancellable_span(spaces_details)
# spaces_details = add_num_of_spaces(spaces_details)


In [166]:
spaces_details_styled = spaces_details.style.applymap(lambda x: "color:darkorange" if x == 1 else None)
spaces_details_styled


Unnamed: 0,start,end,cancellable,id,date,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,total_time_span,cancellable_span
0,2022-01-01 08:00:00,2022-01-01 16:00:00,1,A1,2022-01-01 00:00:00,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,8,8
1,2022-01-01 08:00:00,2022-01-01 16:00:00,0,A2,2022-01-01 00:00:00,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,8,0
2,2022-01-01 14:00:00,2022-01-01 16:00:00,1,A3,2022-01-01 00:00:00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,2,2
3,2022-01-01 10:00:00,2022-01-01 13:00:00,0,A4,2022-01-01 00:00:00,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,3,0
4,2022-01-01 08:00:00,2022-01-01 14:00:00,0,A5,2022-01-01 00:00:00,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,6,0
5,2022-01-01 11:00:00,2022-01-01 16:00:00,0,A6,2022-01-01 00:00:00,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,5,0
6,2022-01-01 08:00:00,2022-01-01 12:00:00,0,A7,2022-01-01 00:00:00,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,4,0
7,2022-01-01 08:00:00,2022-01-01 11:00:00,1,A8,2022-01-01 00:00:00,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3
8,2022-01-01 08:00:00,2022-01-01 13:00:00,1,A9,2022-01-01 00:00:00,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,5,5
9,2022-01-01 15:00:00,2022-01-01 16:00:00,0,A10,2022-01-01 00:00:00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0


In [167]:
# individual_spaces = spaces_details.groupby(["id"]).sum()[["total_time_span", "cancellable_span",]]
# individual_spaces = individual_spaces.sort_values("total_time_span", ascending=False)
# unique_spaces = individual_spaces.index
# individual_spaces


# Second step

- Create combinations from available spaces


In [168]:
spaces_details_group = spaces_details.groupby(["id", "date"]).sum(numeric_only=True)
spaces_details_group


Unnamed: 0_level_0,Unnamed: 1_level_0,cancellable,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,total_time_span,cancellable_span
id,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1
A1,2022-01-01,1,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,8,8
A10,2022-01-01,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0
A2,2022-01-01,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,8,0
A3,2022-01-01,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,2,2
A4,2022-01-01,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,3,0
A5,2022-01-01,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,6,0
A6,2022-01-01,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,5,0
A7,2022-01-01,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,4,0
A8,2022-01-01,1,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3
A9,2022-01-01,1,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,5,5


In [169]:
import itertools

unique_spaces = spaces_details["id"].unique().tolist()
combinations_lst = list(
    itertools.chain.from_iterable(itertools.combinations(unique_spaces, r) for r in range(2, len(unique_spaces) + 1))
)
combinations_lst


[('A1', 'A2'),
 ('A1', 'A3'),
 ('A1', 'A4'),
 ('A1', 'A5'),
 ('A1', 'A6'),
 ('A1', 'A7'),
 ('A1', 'A8'),
 ('A1', 'A9'),
 ('A1', 'A10'),
 ('A2', 'A3'),
 ('A2', 'A4'),
 ('A2', 'A5'),
 ('A2', 'A6'),
 ('A2', 'A7'),
 ('A2', 'A8'),
 ('A2', 'A9'),
 ('A2', 'A10'),
 ('A3', 'A4'),
 ('A3', 'A5'),
 ('A3', 'A6'),
 ('A3', 'A7'),
 ('A3', 'A8'),
 ('A3', 'A9'),
 ('A3', 'A10'),
 ('A4', 'A5'),
 ('A4', 'A6'),
 ('A4', 'A7'),
 ('A4', 'A8'),
 ('A4', 'A9'),
 ('A4', 'A10'),
 ('A5', 'A6'),
 ('A5', 'A7'),
 ('A5', 'A8'),
 ('A5', 'A9'),
 ('A5', 'A10'),
 ('A6', 'A7'),
 ('A6', 'A8'),
 ('A6', 'A9'),
 ('A6', 'A10'),
 ('A7', 'A8'),
 ('A7', 'A9'),
 ('A7', 'A10'),
 ('A8', 'A9'),
 ('A8', 'A10'),
 ('A9', 'A10'),
 ('A1', 'A2', 'A3'),
 ('A1', 'A2', 'A4'),
 ('A1', 'A2', 'A5'),
 ('A1', 'A2', 'A6'),
 ('A1', 'A2', 'A7'),
 ('A1', 'A2', 'A8'),
 ('A1', 'A2', 'A9'),
 ('A1', 'A2', 'A10'),
 ('A1', 'A3', 'A4'),
 ('A1', 'A3', 'A5'),
 ('A1', 'A3', 'A6'),
 ('A1', 'A3', 'A7'),
 ('A1', 'A3', 'A8'),
 ('A1', 'A3', 'A9'),
 ('A1', 'A3', 'A10'),

In [170]:
def bitwise_or(x: pd.Series):
    if x.name in [f"{i}" for i in range(24)]:
        return np.bitwise_or.reduce(x)


new = spaces_details.copy()

new.query("id in @combinations_lst[0]").groupby("date").sum()


  new.query("id in @combinations_lst[0]").groupby("date").sum()


Unnamed: 0_level_0,cancellable,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,total_time_span,cancellable_span
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1
2022-01-01,1,0,0,0,0,0,0,0,0,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,16,8


In [171]:

for comb in combinations_lst:
    
    df_comb = spaces_details.query("id in @comb").groupby("date").sum(numeric_only=True)
    # Edit only 0->23 columns to be 1
    df_comb.loc[:, "0":"23"][(df_comb.loc[:, "0":"23"] > 1)] = 1

    df_comb = pd.concat({"".join(comb): df_comb}, names=["id"])
    spaces_details_group = pd.concat([spaces_details_group, df_comb], axis=0)
    

spaces_details_group[(spaces_details_group > 1)] = 1

spaces_details_group["total_time_span"] = spaces_details_group.loc[:, "0":"23"].sum(axis=1)

spaces_details_group["cancellable_span"] *= spaces_details_group["cancellable"]
        
# df_styled = spaces_details_group.style.applymap(lambda x: "color:darkorange" if x == 1 else None, subset=list(map(str, range(0, 24))))
# # df.reset_index(level=1, inplace=True)
# # df['date'] = df['date'].astype(str)
# # df.set_index('date', append=True, inplace=True)
# df_styled


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_comb.loc[:, "0":"23"][(df_comb.loc[:, "0":"23"] > 1)] = 1
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_comb.loc[:, "0":"23"][(df_comb.loc[:, "0":"23"] > 1)] = 1
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_comb.loc[:, "0":"23"][(df_comb.loc[:, "0":"23"] > 1)] = 1
A value is trying to be set on a copy of a slice from a DataFrame

See the cave

In [172]:

spaces_details_group = spaces_details_group.groupby("id")[["total_time_span", "cancellable_span"]].sum()
spaces_details_group["cancellable_percent"] = (
        spaces_details_group["cancellable_span"] / spaces_details_group["cancellable_span"].sum()
    )


spaces_details_group["num_spaces"] = spaces_details_group.index.str.split(pat=r"\d", regex=True)
spaces_details_group["num_spaces"] = spaces_details_group["num_spaces"].apply(len) - 1
        
spaces_details_group["distance"] = (spaces_details_group["total_time_span"] - 100).abs()
spaces_details_group.drop(columns=["total_time_span"], inplace=True)

decisions_variables = ["cancellable_percent", "num_spaces", "distance"]
spaces_details_group = spaces_details_group[decisions_variables]

In [173]:
import numpy as np
import pandas as pd


class Topsis:
    decision_matrix = np.array([])  # Matrix
    combinations = []
    weighted_normalized_decision_matrix = np.array([])  # Weight matrix
    normalized_decision_matrix = np.array([])  # Normalisation matrix
    M = 0  # Number of rows
    N = 0  # Number of columns

    """
	Create an evaluation matrix consisting of m alternatives and n criteria,
	with the intersection of each alternative and criteria given as {\displaystyle x_{ij}}x_{ij},
	we therefore have a matrix {\displaystyle (x_{ij})_{m\times n}}(x_{{ij}})_{{m\times n}}.
	"""

    def __init__(self, decision_matrix: np.ndarray, weight_matrix: list[int], criteria: list[bool]):
        self.dmatrix = decision_matrix

        # M×N matrix
        self.decision_matrix = np.array(decision_matrix, dtype="float")

        # M alternatives (options)
        self.row_size = len(self.decision_matrix)

        # N attributes (criteria)
        self.column_size = len(self.decision_matrix[0])

        # N size weight matrix
        self.weight_matrix = np.array(weight_matrix, dtype="float")
        self.weight_matrix = self.weight_matrix / sum(self.weight_matrix)
        self.criteria = np.array(criteria, dtype="float")

    """
	# Step 2
	The matrix {\displaystyle (x_{ij})_{m\times n}}(x_{{ij}})_{{m\times n}} is then normalised to form the matrix
	"""

    def step_2(self):
        # normalized scores
        self.normalized_decision_matrix = self.decision_matrix.copy()

        sqrd_sum = np.power(self.normalized_decision_matrix, 2).sum(axis=0)

        self.normalized_decision_matrix = self.normalized_decision_matrix / (sqrd_sum**0.5)

    """
	# Step 3
	Calculate the weighted normalized decision matrix
	"""

    def step_3(self):
        self.weighted_normalized_decision_matrix = self.normalized_decision_matrix.copy()

        self.weighted_normalized_decision_matrix = self.weight_matrix * self.weighted_normalized_decision_matrix

    """
	# Step 4
	Determine the worst alternative {\displaystyle (A_{w})}(A_{w}) and the best alternative {\displaystyle (A_{b})}(A_{b}):
	"""

    def step_4(self):
        self.worst_alternatives = np.zeros(self.column_size)
        # self.best_alternatives = np.zeros(self.column_size)

        self.best_alternatives = np.array([0, 0, 0])  # set custom best alternatives

        for i in range(self.column_size):
            if self.criteria[i]:
                self.worst_alternatives[i] = min(self.weighted_normalized_decision_matrix[:, i])
                # self.best_alternatives[i] = max(self.weighted_normalized_decision_matrix[:, i])
            else:
                self.worst_alternatives[i] = max(self.weighted_normalized_decision_matrix[:, i])
                # self.best_alternatives[i] = min(self.weighted_normalized_decision_matrix[:, i])

    """
	# Step 5
	Calculate the L2-distance between the target alternative {\displaystyle i}i and the worst condition {\displaystyle A_{w}}A_{w}
	{\displaystyle d_{iw}={\sqrt {\sum _{j=1}^{n}(t_{ij}-t_{wj})^{2}}},\quad i=1,2,\ldots ,m,}
	and the distance between the alternative {\displaystyle i}i and the best condition {\displaystyle A_{b}}A_b
	{\displaystyle d_{ib}={\sqrt {\sum _{j=1}^{n}(t_{ij}-t_{bj})^{2}}},\quad i=1,2,\ldots ,m}
	where {\displaystyle d_{iw}}d_{{iw}} and {\displaystyle d_{ib}}d_{{ib}} are L2-norm distances
	from the target alternative {\displaystyle i}i to the worst and best conditions, respectively.
	"""

    def step_5(self):
        self.worst_distance = np.zeros(self.row_size)
        self.best_distance = np.zeros(self.row_size)

        self.worst_distance_mat = self.weighted_normalized_decision_matrix.copy()
        self.best_distance_mat = self.weighted_normalized_decision_matrix.copy()

        self.worst_distance = ((self.worst_distance_mat - self.worst_alternatives) ** 2).sum(axis=1) ** 0.5
        self.best_distance = ((self.best_distance_mat - self.best_alternatives) ** 2).sum(axis=1) ** 0.5

    """
	# Step 6
	Calculate the similarity
	"""

    def step_6(self):
        np.seterr(all="ignore")
        self.worst_similarity = np.zeros(self.row_size)
        self.best_similarity = np.zeros(self.row_size)
        self.performance_score = np.zeros(self.row_size)

        self.worst_similarity = self.worst_distance / (self.worst_distance + self.best_distance)
        self.best_distance = self.worst_distance / (self.worst_distance + self.best_distance)

    def rank_to_worst_similarity(self):
        return [ind + 1 for ind, val in enumerate(self.worst_similarity.argsort())]

    def rank_to_best_similarity(self):
        return [val + 1 for val in self.best_similarity.argsort()]

    def calc(self):
        # print("Step 1\n", self.decision_matrix, end="\n\n")
        self.step_2()
        # print("Step 2\n", self.normalized_decision_matrix, end="\n\n")
        self.step_3()
        # print("Step 3\n", self.weighted_normalized_decision_matrix, end="\n\n")
        self.step_4()
        # print("Step 4\n", self.worst_alternatives, self.best_alternatives, end="\n\n")
        self.step_5()
        # print("Step 5\n", self.worst_distance, self.best_distance, end="\n\n")
        self.step_6()
        # print("Step 6\n", self.worst_similarity, self.best_similarity, end="\n\n")

    def get_rank(self):
        self.calc()
        scores = pd.DataFrame(
            {
                "performance": self.worst_similarity,
            },
            index=self.dmatrix.index,
        )
        # Append additional previous details
        norm_dmatrix = pd.DataFrame(
            self.weighted_normalized_decision_matrix,
            columns=["norm_cancellables", "norm_space", "norm_distance"],
            index=self.dmatrix.index,
        )
        scores = pd.concat([scores, self.dmatrix, norm_dmatrix], axis=1)

        scores = scores.sort_values(by="performance", ascending=False)

        # Adding ranking column to score df
        scores.insert(0, "rank", range(1, len(self.worst_similarity) + 1))

        # Save the score results to csv file
        # scores.to_csv(settings.COMB_SCORES_PATH)

        return scores


In [176]:

Topsis(spaces_details_group, weight_matrix=[69.2, 23.1, 7.7], criteria=[False, False, False]).get_rank()

Unnamed: 0_level_0,rank,performance,cancellable_percent,num_spaces,distance,norm_cancellables,norm_space,norm_distance
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
A2,1,0.904327,0.0,1,92,0.0,0.001247,0.002402
A5,2,0.902844,0.0,1,94,0.0,0.001247,0.002454
A6,3,0.902102,0.0,1,95,0.0,0.001247,0.00248
A7,4,0.901359,0.0,1,96,0.0,0.001247,0.002506
A4,5,0.900617,0.0,1,97,0.0,0.001247,0.002532
A5A6,6,0.878327,0.0,2,92,0.0,0.002494,0.002402
A2A4,7,0.878327,0.0,2,92,0.0,0.002494,0.002402
A6A7,8,0.878327,0.0,2,92,0.0,0.002494,0.002402
A2A7,9,0.878327,0.0,2,92,0.0,0.002494,0.002402
A2A6,10,0.878327,0.0,2,92,0.0,0.002494,0.002402


In [None]:
# Convert multiindex dataframe to json
# data = {level: df.xs(level).to_dict("index") for level in df.index.levels[0]}
# D = spaces_details_group.groupby(level=0).apply(lambda df: df.xs(df.name).to_dict()).to_dict()

# with open("./jsdf.json", mode="w") as f:
#     json.dump(data, f, indent=4)


In [None]:
# Get total time span
combinations_time_span = df.reset_index().groupby("id")[["total_time_span", "cancellable_percent"]].sum()
combinations_time_span["cancellable_percent"] *= 100
fig, ax = plt.subplots(2, 1, figsize=(10, 7))

sns.set_style("ticks", {"axes.grid": True})
sns.barplot(x=combinations_time_span.index, y="total_time_span", data=combinations_time_span, palette="Set2", ax=ax[0])
sns.barplot(
    x=combinations_time_span.index, y="cancellable_percent", data=combinations_time_span, palette="Set2", ax=ax[1]
)
sns.despine(bottom=True, left=True)


ax[0].bar_label(ax[0].containers[-1], fmt="Time:\n%.2f", label_type="edge")
ax[1].bar_label(ax[1].containers[-1], fmt="Cancellable:\n%.2f", label_type="edge")
ax[0].set(ylabel="Total time span [h]")
ax[1].set(ylabel="Cancellable percentage")
fig.tight_layout()


NameError: name 'df' is not defined