In [None]:
# Cell 1: Imports
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import datetime
from sklearn import preprocessing
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
import pandas as pd, numpy as np, datetime, re

In [None]:
def convert_to_liters(capacity):
    capacity = str(capacity).strip().upper()
    return float(re.sub(r'[^\d.]', '', capacity)) / (100 if 'CL' in capacity else 1000 if 'ML' in capacity else 1) if any(unit in capacity for unit in ['CL', 'ML', 'LITRE', 'LTR', 'L']) else ''

def preprocess_data(df):
    if df.empty: return df
    
    # Process numeric columns
    for col in ['Price', 'ABV', 'Capacity']:
        if col in df.columns:
            df[col] = pd.to_numeric(df[col].apply(lambda x: re.sub(r'[^\d.]', '', str(x)).strip() or np.nan), errors='coerce')
            df[col] = MinMaxScaler().fit_transform(df[[col]]).round(3) if df[col].notnull().any() else df[col]
    
    # Process Style and Characteristics columns
    for field in ['Style', 'Characteristics']:
        if field in df.columns:
            df[field] = df[field].str.replace(r'[^\w\s&,]', '', regex=True).str.split('&' if field == 'Style' else ',').apply(lambda x: [item.strip() for item in x] if isinstance(x, list) else x)
            max_len = df[field].apply(lambda x: len(x) if isinstance(x, list) else 0).max()
            for i in range(1, max_len + 1):
                df[f'{field.rstrip("s")} {i}'] = df[field].apply(lambda x: x[i-1] if isinstance(x, list) and len(x) >= i else '')
            df = df.drop(columns=[field])
    
    # Process Vintage
    if 'Vintage' in df.columns:
        current_year = datetime.datetime.now().year
        df['Vintage'] = df['Vintage'].apply(lambda x: current_year if str(x).strip().upper() == 'NV' else (int(re.search(r'\d{4}', str(x)).group(0)) if re.search(r'\d{4}', str(x)) else np.nan))
        valid_years = df['Vintage'][df['Vintage'] > 1900]
        if not valid_years.empty:
            df['Vintage'] = df['Vintage'].apply(lambda x: max(0, (x - current_year) / (valid_years.min() - current_year)) if pd.notna(x) else np.nan).round(2)
    
    return df

# Main execution
df = pd.read_csv('../datasets/WineDataset.csv')
df_cleaned = preprocess_data(df)
df_cleaned.to_csv('../datasets/cleaned_wines.csv', index=False)
df_cleaned.head()

In [None]:
import pandas as pd

file1 = "../datasets/updated_wines.csv"
file2 = "../datasets/merged_wine_dataset.csv"

df1 = pd.read_csv(file1) 
df2 = pd.read_csv(file2) 

# Merge the datasets based on WineName and WineryName
merged_df = df2.merge(df1[['WineName', 'WineryName', 'Ratings']], on=['WineName', 'WineryName'], how='left')

# Save the new dataset
output_file = "../datasets/PLNTD_dataset.csv"
merged_df.to_csv(output_file, index=False)

print(f"PLNTD_dataset created and saved to {output_file}")

missing_ratings = merged_df[merged_df['Ratings'].isna()]

#Testing purposes
if not missing_ratings.empty:
    print("WARNING: Some rows in the dataset are missing a rating.")
    print(missing_ratings)
else:
    print("SUCCESS: All rows have a rating.")

# Data prepatation and visualization

In this cell is performed a initial data preparation, creating a copy of the original DataFrame and selecting specific columns.

Two types of criteria are defined:

- Benefit criteria: characteristics where higher values are better
- Cost criteria: characteristics where lower values are better (such as price)

Encoding is performed to convert categorical variables into numerical ones:

- Acidity: scale from 1 to 3
- Body of wine: scale from 1 to 5

Numerical columns are cleaned by removing symbols and text to convert them to float type:

- ABV: removes "ABV" and the "%" symbol
- Price: removes "£" and "per bottle"

A density plot (KDE plot) is created for each column before normalization to visualize the data distribution.

In [None]:
copy_df = merged_df.copy()
columns_for_copy = ['ABV', 'Ratings', 'Body', 'Acidity', 'Price', 'WineName', 'WineryName']
copy_df = copy_df[columns_for_copy]


benefit_criteria = ['ABV', 'Ratings', 'Body', 'Acidity']
cost_criteria = ['Price']
print("Benefit Criteria:", benefit_criteria)
print("Cost Criteria:", cost_criteria)

# Convert categorical 'Acidity' to numeric values (encoding)
# Convert categorical 'Acidity' and 'Body' to numeric values (encoding)
acidity_mapping = {'Low': 1, 'Medium': 2, 'High': 3}
body_mapping = {'Very light-bodied': 1, 'Light-bodied': 2, 'Medium-bodied': 3, 'Full-bodied': 4, 'Very full-bodied': 5}

merged_df['Acidity'] = [acidity_mapping[val] for val in merged_df['Acidity']]
merged_df['Body'] = [body_mapping[val] for val in merged_df['Body']]

merged_df['ABV'] = merged_df['ABV'].str.replace('ABV ', '').str.replace('%', '').astype(float)
merged_df['Price'] = merged_df['Price'].str.replace('£', '').str.replace('per bottle', '').astype(float)

columns_to_keep = benefit_criteria + cost_criteria
merged_df = merged_df[columns_to_keep]

for column in merged_df.columns:
    plt.figure(figsize=(12, 4))
    plt.subplot(1, 2, 1)
    sns.kdeplot(merged_df[column])
    plt.title(f'Before normalization - {column}')
    plt.tight_layout()
    plt.show()

print(merged_df)

# Normalization methods

Each normalization method has its advantages, and by using all four we can compare the results against each other and see if any method works better for the specific data

- Min-Max Normalization: Rescales the data so that its values fall within the range [0, 1] using the minimum and maximum values of the dataset
- Linear Max Normalization: Divides each variable value by the maximum value of that variable, scaling the data to the range [0, 1].
- Logarithmic Normalization: Applies a logarithmic transformation to the data.
- Vector Normalization: Transforms the original values into relative proportions, considering the total magnitude of the data, and allows comparing different criteria on a common scale.

In [None]:
import pandas as pd
import numpy as np

def normalize_dataset_with_min_max(df, benefit_criteria, cost_criteria):
    """
    Formula for Min-Max technique:
    
    For benefit criteria:
    nij = (rij - rmin) / (rmax - rmin)
    
    For cost criteria:
    nij = (rmax - rij) / (rmax - rmin)
    
    Args:
        df: DataFrame with data
        benefit_criteria: List of benefit criteria
        cost_criteria: List of cost criteria
    
    Returns:
        Normalized DataFrame
    """
    # Create a copy of DataFrame to avoid modifying the original
    normalized_df = df.copy()
    
    # Normalize benefit criteria (Min-Max)
    for criteria in benefit_criteria:
        rmin = df[criteria].min()
        rmax = df[criteria].max()
        normalized_df[criteria] = (df[criteria] - rmin) / (rmax - rmin)

    # Normalize cost criteria (Inverted Min-Max)
    for criteria in cost_criteria:
        rmin = df[criteria].min()
        rmax = df[criteria].max()
        normalized_df[criteria] = (rmax - df[criteria]) / (rmax - rmin)
        
    return normalized_df

def normalize_dataset_with_linear_max(df, benefit_criteria, cost_criteria):
    """
    Formula for Linear Max technique:
    
    For benefit criteria:
    nij = rij / rmax
    
    For cost criteria:
    nij = 1 - (rij / rmax)
    
    Args:
        df: DataFrame with data
        benefit_criteria: List of benefit criteria
        cost_criteria: List of cost criteria
    
    Returns:
        Normalized DataFrame
    """
    # Create a copy of DataFrame to avoid modifying the original
    normalized_df = df.copy()
    
    # Normalize benefit criteria
    for criteria in benefit_criteria:
        rmax = df[criteria].max()
        normalized_df[criteria] = df[criteria] / rmax
        
    # Normalize cost criteria
    for criteria in cost_criteria:
        rmax = df[criteria].max()
        normalized_df[criteria] = 1 - (df[criteria] / rmax)
        
    return normalized_df

def normalize_dataset_with_vector(df, benefit_criteria, cost_criteria):
    """
    Vector normalization:
    
    For benefit criteria:
    g'_j(a_i) = g_j(a_i) / sqrt(sum([g_j(a_k)]^2))
    
    For cost criteria:
    g'_j(a_i) = (min(g_j) - g_j(a_i)) / sqrt(sum([min(g_j) - g_j(a_k)]^2))
    
    Args:
        df: DataFrame with data
        benefit_criteria: List of benefit criteria
        cost_criteria: List of cost criteria
    
    Returns:
        Normalized DataFrame
    """
    import numpy as np
    
    # Create a copy of DataFrame to avoid modifying the original
    normalized_df = df.copy()
    
    # Normalize benefit criteria
    for criteria in benefit_criteria:
        # Calculate denominator (square root of sum of squares)
        denominator = np.sqrt(np.sum(df[criteria] ** 2))
        # Apply normalization formula
        normalized_df[criteria] = df[criteria] / denominator
        
    # Normalize cost criteria
    for criteria in cost_criteria:
        # Find minimum value of criteria
    
        # Calculate numerator (min - current value)
        numerator = df[criteria]
        # Calculate denominator (square root of sum of squared differences)
        denominator = np.sqrt(np.sum(df[criteria] ** 2))
        # Apply normalization formula
        result = numerator / denominator

        normalized_df[criteria] = 1 - result
        
    return normalized_df

def normalize_dataset_with_logarithmic(df, benefit_criteria, cost_criteria):
   """
   Logarithmic normalization:
   
   For benefit criteria:
   g'_j(a_i) = log(g_j(a_i)) / log(max(g_j))
   
   For cost criteria:
   g'_j(a_i) = log(min(g_j)) / log(g_j(a_i))
   
   Args:
       df: DataFrame with data
       benefit_criteria: List of benefit criteria
       cost_criteria: List of cost criteria
   
   Returns:
       Normalized DataFrame
   """
   import numpy as np
   
   # Create a copy of DataFrame to avoid modifying the original
   normalized_df = df.copy()
   
   # Normalize benefit criteria
   for criteria in benefit_criteria:
       # Find maximum value of criteria
       max_value = df[criteria].max()
       # Apply logarithmic normalization formula
       # Add small value (eps) to avoid log(0)
       eps = np.finfo(float).eps
       normalized_df[criteria] = np.log(df[criteria] + eps) / np.log(max_value + eps)
       
   # Normalize cost criteria
   for criteria in cost_criteria:
       # Find minimum value of criteria
       min_value = df[criteria].min()
       # Apply logarithmic normalization formula
       eps = np.finfo(float).eps
       normalized_df[criteria] = np.log(min_value + eps) / np.log(df[criteria] + eps)
       
   return normalized_df

# Apply normalization techniques
normalized_data_min_max = normalize_dataset_with_min_max(merged_df, benefit_criteria, cost_criteria)
normalized_data_linear_max = normalize_dataset_with_linear_max(merged_df, benefit_criteria, cost_criteria)
normalized_data_vector = normalize_dataset_with_vector(merged_df, benefit_criteria, cost_criteria)
normalized_data_logarithmic = normalize_dataset_with_logarithmic(merged_df, benefit_criteria, cost_criteria)


# Visualization of normalized data

For each normalization method:

- Show the first rows of the normalized dataset
- Calculate and present the minimum and maximum values for each column
- Provide a complete view of the normalized dataset

By visualizing the datasets, it is possible to compare the different normalization techniques and identify possible anomalies or patterns

It is interesting to note that in the case of columns that were designated as cost criteria, their data distribution appears inverted when compared to the distribution visualized previously before normalization. The remaining distributions maintain the same shape as before being normalized

In [None]:
# Create a list with all normalized datasets and their names
normalized_datasets = [
   (normalized_data_min_max, "Min-Max"),
   (normalized_data_linear_max, "Linear Max"), 
   (normalized_data_vector, "Vector"),
   (normalized_data_logarithmic, "Logarithmic")
]

# Show results for each normalization method
for dataset, method_name in normalized_datasets:
   print(f"\nFirst rows of normalized data ({method_name}):")
   print(dataset.head())
   
   print(f"\nMinimum and maximum values after normalization ({method_name}):")
   for column in dataset.columns:
       print(f"{column}:")
       print(f"  Min: {dataset[column].min():.3f}")
       print(f"  Max: {dataset[column].max():.3f}")
   
   # Create visualizations for each column of the dataset
   for column in dataset.columns:
       plt.figure(figsize=(12, 4))
       plt.subplot(1, 2, 1)
       sns.kdeplot(dataset[column])
       plt.title(f'{method_name} normalization - {column}')
       plt.tight_layout()
       plt.show()
   
   print(f"\nComplete dataset ({method_name}):")
   print(dataset)
   print("\n" + "="*50 + "\n")  # Separator between different methods

# Evaluation Metrics for Normalizations

Three different metrics are implemented to evaluate the quality and characteristics of the normalizations performed.

- Standard Deviation: Measures the dispersion of normalized data and allows evaluation of variability in each column after normalization
- Minkowski Distance: Calculates the average distance between all pairs of observations and evaluates how normalization affects spatial relationships between observations
- Mean Squared Error (MSE): Quantifies the difference between original and normalized data. Provides a measure of the magnitude of change.

In [None]:
import numpy as np

def calculate_standard_deviation(normalized_df):
    return normalized_df.std()

def calculate_minkowski_distance(normalized_df, p=2):
    from scipy.spatial.distance import minkowski
    distances = []
    for i in range(len(normalized_df) - 1):
        for j in range(i + 1, len(normalized_df)):
            distances.append(minkowski(normalized_df.iloc[i], normalized_df.iloc[j], p=p))
    return np.mean(distances)  # Mean distance

def calculate_mean_squared_error(normalized_df, original_df):
    mse = ((normalized_df - original_df) ** 2).mean().mean()  # Mean Squared Error
    return mse


In [None]:
# Dictionary to store results
results = {}

# Calculate metrics for each normalization method
for dataset, method_name in normalized_datasets:
    results[method_name] = {
        'standard_deviation': calculate_standard_deviation(dataset),
        'minkowski_distance': calculate_minkowski_distance(dataset),
        'mean_squared_error': calculate_mean_squared_error(dataset, merged_df)
    }

# Show results
for method_name, metrics in results.items():
    print(f"\nResults for {method_name}:")
    print("Standard Deviation:")
    print(metrics['standard_deviation'])
    print("\nAverage Minkowski Distance:")
    print(f"{metrics['minkowski_distance']:.4f}")
    print("\nMean Squared Error:")
    print(f"{metrics['mean_squared_error']:.4f}")
    print("\n" + "="*50)

# Create DataFrame with results for comparison
import pandas as pd

# Prepare data for DataFrame
comparison_data = []
for method_name, metrics in results.items():
    # Calculate mean of standard deviation
    mean_std = metrics['standard_deviation'].mean()
    
    comparison_data.append({
        'Method': method_name,
        'Mean Standard Deviation': mean_std,
        'Minkowski Distance': metrics['minkowski_distance'],
        'Mean Squared Error': metrics['mean_squared_error']
    })

comparison_df = pd.DataFrame(comparison_data)

# Show comparative table
print("\nComparative Table:")
print(comparison_df)

# Results visualization
import matplotlib.pyplot as plt

# Set figure size
plt.figure(figsize=(15, 5))

# Subplot for Mean Standard Deviation
plt.subplot(1, 3, 1)
plt.bar(comparison_df['Method'], comparison_df['Mean Standard Deviation'])
plt.title('Mean Standard Deviation')
plt.xticks(rotation=45)

# Subplot for Minkowski Distance
plt.subplot(1, 3, 2)
plt.bar(comparison_df['Method'], comparison_df['Minkowski Distance'])
plt.title('Minkowski Distance')
plt.xticks(rotation=45)

# Subplot for Mean Squared Error
plt.subplot(1, 3, 3)
plt.bar(comparison_df['Method'], comparison_df['Mean Squared Error'])
plt.title('Mean Squared Error')
plt.xticks(rotation=45)

plt.tight_layout()
plt.show()

# Weight Attribution and Best Normalization Selection

As could be observed from the previous metrics, the vectorization method is the normalization method that guarantees better integrity and fidelity with the data. Here, this normalization is selected and weights are applied to the respective criteria in order to proceed with the calculation of the matrices necessary to develop the ELECTRE algorithm

In [None]:
# get the vector normalized dataset from normalized_datasets list
vector_normalized_data = [dataset for dataset, method_name in normalized_datasets if method_name == "Vector"][0]

normalized_data = vector_normalized_data.copy()

weights = {
    'ABV': 0.10,
    'Ratings': 0.35,
    'Body': 0.10,
    'Price': 0.25,
    'Acidity': 0.10
}
# multiply each column by its weight

for column in normalized_data.columns:
    normalized_data[column] = normalized_data[column] * weights[column]

# check scores
standard_deviation = calculate_standard_deviation(normalized_data)
minkowski_distance = calculate_minkowski_distance(normalized_data)
mean_squared_error = calculate_mean_squared_error(normalized_data, merged_df)

print("Standard Deviation:")
print(standard_deviation)
print("\nMinkowski Distance:")
print(minkowski_distance)
print("\nMean Squared Error:")
print(mean_squared_error)
    

In [None]:
#  def compare_alternatives(normalized_df, benefit_criteria, cost_criteria, weights):
#      """
#      Compares each pair of alternatives and creates sets of superior and inferior values.
#      
#      Args:
#          normalized_df: Normalized DataFrame with criteria
#      
#      Returns:
#          superior_values_set: Dictionary with superior value sets
#          inferior_values_set: Dictionary with inferior value sets
#      """
#  
#      n_alternatives = len(normalized_df)
#      criteria = list(normalized_df.columns)
#      
#      # Arrays 3D inicializados con False
#      superior = np.zeros((n_alternatives, n_alternatives), dtype=float)
#      inferior = np.zeros((n_alternatives, n_alternatives), dtype=bool)
#      
#      for i in range(n_alternatives):
#          for j in range(n_alternatives):
#              if i != j:
#                  for k, criterion in enumerate(criteria):
#                      if normalized_df.iloc[i][criterion] > normalized_df.iloc[j][criterion] and criterion in benefit_criteria:
#                          superior[i, j] = superior[i, j] + weights[criterion]
#                      if normalized_df.iloc[i][criterion] < normalized_df.iloc[j][criterion] and criterion in cost_criteria:
#                          superior[i, j] = superior[i, j] + weights[criterion]
#                      else:
#                          inferior[i, j] = True
#                  
#      return superior, inferior

# Concordance Matrix Calculation

The concordance matrix is a fundamental part of the ELECTRE algorithm and represents, for each pair of alternatives (a,b), the degree to which alternative a is at least as good as alternative b. The calculation is performed by summing the weights of the criteria where alternative a is equal to or better than b. 

In [None]:


def calculate_concordance_matrix(normalized_df, criteria_weights):
    """
    Calculates the concordance matrix C.
    
    Args:
        normalized_df: DataFrame with normalized values
        concordance_sets: Dictionary with concordance sets
        criteria_weights: Dictionary with criteria weights (optional)
    
    Returns:
        DataFrame with the concordance matrix
    """
    n_alternatives = len(normalized_df)
    criteria = list(normalized_df.columns)
    
    # Initialize concordance matrix
    concordance_matrix = np.zeros((n_alternatives, n_alternatives))
    
    # Calculate each element Cij
    for i in range(n_alternatives):
        for j in range(n_alternatives):
            if i != j:  
                # Sum the weights of the criteria in the concordance set
                concordance_matrix[i, j] = sum(criteria_weights[criterion] 
                                               for criterion in criteria 
                                               if (normalized_df.iloc[i][criterion] > normalized_df.iloc[j][criterion]))
                    

    # Calculate the sum of all elements
    sum_elements = np.sum(concordance_matrix)

    # Calculate the total number of elements
    num_elements = concordance_matrix.size - n_alternatives

    # Calculate C_BAR
    c_bar = sum_elements / num_elements

    # Generate the new matrix by comparing with C_BAR
    binary_matrix = (concordance_matrix >= c_bar).astype(int)

    return pd.DataFrame(binary_matrix, 
                        index=[f'A{i+1}' for i in range(n_alternatives)],
                        columns=[f'A{i+1}' for i in range(n_alternatives)])

# Calculate the matrices
concordance_matrix = calculate_concordance_matrix(normalized_data, weights)


# Discordance Matrix Calculation

The discordance matrix in ELECTRE measures the degree of disagreement or 'veto' between alternatives. Discordance measures how strong the evidence is against the claim 'a is at least as good as b'

In [None]:
def calculate_discordance_matrix(normalized_df):
    """
    Calculates the distance matrix between all alternatives.
    
    Parameters:
    normalized_df (pandas.DataFrame): Normalized DataFrame with alternatives as indices and criteria as columns
    
    Returns:
    pandas.DataFrame: Distance matrix between all alternatives
    """
    n_alternatives = len(normalized_df)
    
    # Create an empty DataFrame to store distances
    distances_df = np.zeros((n_alternatives, n_alternatives))
    
    for i in range(n_alternatives):
        for j in range(n_alternatives):
            if i != j:
    
                # Calculate all differences in all criteria between alternatives i and j, and store them in a list
                differences = [normalized_df.iloc[i][c] - normalized_df.iloc[j][c] for c in normalized_df.columns]

                # Calculate the max negative difference
                max_neg_diff = min(differences)

                # Calculate the greatest absolute difference
                max_diff = max([abs(d) for d in differences])

                # Calculate the distance between alternatives i and j
                distances_df[i, j] = abs(max_neg_diff) / max_diff if max_diff != 0 else 0
            
    # Calculate the sum of all elements
    sum_elements = np.sum(distances_df)

    # Calculate the total number of elements
    num_elements = distances_df.size - n_alternatives

    # Calculate D_BAR
    d_bar = sum_elements / num_elements
    
    # Generate the binary matrix
    binary_matrix = (distances_df >= d_bar).astype(int)
    
    return pd.DataFrame(binary_matrix, 
                        index=[f'A{i+1}' for i in range(n_alternatives)],
                        columns=[f'A{i+1}' for i in range(n_alternatives)])
    
# Calculate distances
discordance_matrix = calculate_discordance_matrix(normalized_data)

# Aggregation of Concordance and Discordance Matrices

The discordance matrix is used together with the concordance matrix to determine the outranking relationships between alternatives.

In [None]:
def calculate_concordance_discordance_aggregation_matrix(concordance_matrix, discordance_matrix):
    """
    Calculates the global concordance matrix C from the concordance and discordance matrices.
    
    Parameters:
    concordance_matrix (pandas.DataFrame): Binary concordance matrix
    discordance_matrix (pandas.DataFrame): Binary discordance matrix
    
    Returns:
    pandas.DataFrame: Global concordance matrix
    """
    # Calculate the global concordance matrix
    global_concordance_matrix = concordance_matrix & discordance_matrix
    
    return global_concordance_matrix

# Calculate the aggregation matrix
aggregation_matrix = calculate_concordance_discordance_aggregation_matrix(concordance_matrix, discordance_matrix)


# Ranking of Alternatives

In this cell, a ranking of alternatives is created based on an aggregation matrix. If it finds a 1 in this matrix at position (i,j), it means that alternative i dominates j

- Increases the dominance counter for i
- Increases the 'times dominated' counter for j

Sorts the alternatives based on two criteria: First by the number of dominances (higher is better). In case of a tie, by the lowest number of times dominated

In [None]:
def calculate_ranking(matrix):
    n = len(matrix)
    dominance = [0] * n
    dominated = [0] * n

    for index in range(len(matrix)):
        row_array = matrix.iloc[index].to_numpy()  # Convertir la fila en array
        row_list = row_array.tolist()  # Convertir el array de NumPy en lista Python
        dominance[index] = row_list.count(1)  # Ahora contamos 1.0 en lugar de 1
        dominated[index] = row_list.count(0) - 1  # Contamos 0.0 en lugar de 0


    # Create ranking based on dominance and dominated
    ranking = sorted(range(n), key=lambda x: (-dominance[x], dominated[x]))
    
    # Create a dictionary mapping alternative index to rank
    # Modificación sugerida
    rank_dict = {alt: pos for pos, alt in enumerate(ranking, start=1)}  # Usar el índice directamente sin sumar 1    

    return rank_dict, dominance, dominated

# Calculate ranking
ranking, dominance, dominated = calculate_ranking(aggregation_matrix)

# Display ranking
print("Ranking:")
for pos, alt in enumerate(ranking, start=1):
    print(f"A{alt+1}: Position = {pos}")

In [None]:
# clear null values from the dataset
copy_df = copy_df.dropna()

# Calculate ranking
ranking, dominance, dominated = calculate_ranking(aggregation_matrix)

# Add ranking to merged_df
copy_df['Ranking'] = merged_df.index.map(lambda x: ranking.get(x))

# Sort the dataframe by ranking
copy_df = copy_df.sort_values('Ranking')

# Display the ranked dataframe
print(copy_df)

# Mini example

In [None]:
data = {
    'ABV': [17, 5, 13, 9, 21],
    'Ratings': [3, 5, 2, 4, 1],
    'Body': [5, 2, 4, 3, 1],
    'Acidity': [1, 3, 1, 2, 3],
    'Price': [50, 30, 20, 10, 5]
}

df = pd.DataFrame(data, index=['A1', 'A2', 'A3', 'A4', 'A5'])

benefit_criteria = ['ABV', 'Ratings', 'Body', 'Acidity']
cost_criteria = ['Price']
normalized_df = normalize_dataset_with_vector(df, benefit_criteria, cost_criteria)

weights = {
    'ABV': 0.10,
    'Ratings': 0.35,
    'Body': 0.10,
    'Price': 0.25,
    'Acidity': 0.10
}

for column in normalized_df.columns:
    normalized_df[column] = normalized_df[column] * weights[column]

concordance_matrix = calculate_concordance_matrix(normalized_df, weights)
discordance_matrix = calculate_discordance_matrix(normalized_df)
aggregation_matrix = calculate_concordance_discordance_aggregation_matrix(concordance_matrix, discordance_matrix)

print("Aggregation Matrix:")
print(aggregation_matrix)
rank_dict, dominance, dominated = calculate_ranking(aggregation_matrix)

print("Dominance:", dominance)
print("Dominated:", dominated)

print("Ranking:", rank_dict)
normalized_df['Ranking'] = normalized_df.index.map(lambda x: rank_dict.get(x))
normalized_df = normalized_df.sort_values('Ranking')
print(normalized_df)

In [None]:
#  normalized_df = normalized_df.drop(columns=['Ranking'])
#  superior, inferior = compare_alternatives(normalized_df, benefit_criteria, cost_criteria, weights)
#  
#  print("Superior Values Set:")
#  print(superior)
#  print("\nInferior Values Set:")
#  print(inferior)