### Load data and cleaning unnecessary columns

In [29]:
import pandas as pd
import re
import numpy as np

file_path = '../../datasets/merged_wine_dataset.csv'
df = pd.read_csv(file_path)
df = df[['WineName', 'Price', 'ABV', 'Acidity']]

### Dataset normalization

In [30]:
df['Price'] = df['Price'].apply(lambda x: float(re.sub(r'[£, per bottle]', '', x.strip())) if pd.notnull(x) else np.nan)

# Extract ABV values as float
df['ABV'] = df['ABV'].apply(lambda x: float(re.search(r'(\d+\.\d+)', x).group(1)) if pd.notnull(x) else np.nan)

# Map 'Acidity' qualitative values to numerical (e.g., High = 3, Medium = 2, Low = 1)
acidity_map = {'High': 3, 'Medium': 2, 'Low': 1}
df['Acidity'] = df['Acidity'].map(acidity_map)

# Filter rows where these values are not null
df = df.dropna()

### TOPSIS Implementation

In [32]:
from sklearn.preprocessing import MinMaxScaler

print("Enter weights for each criterion (total should add up to 1):")
price_weight = float(input("Weight for Price (e.g., 0.3): "))
abv_weight = float(input("Weight for ABV (e.g., 0.4): "))
acidity_weight = float(input("Weight for Acidity (e.g., 0.3): "))

# Normalize weights
total_weight = price_weight + abv_weight + acidity_weight
price_weight /= total_weight
abv_weight /= total_weight
acidity_weight /= total_weight
weights = np.array([price_weight, abv_weight, acidity_weight])

# Infer criterion type based on weights
threshold = 0.35  # Example threshold for determining 'benefit' vs. 'cost'
criteria_types = [
    'benefit' if price_weight > threshold else 'cost',
    'benefit' if abv_weight > threshold else 'cost',
    'benefit' if acidity_weight > threshold else 'cost'
]

# Display inferred criteria types for confirmation
print("\nInferred Criterion Types:")
print(f"Price: {criteria_types[0]}")
print(f"ABV: {criteria_types[1]}")
print(f"Acidity: {criteria_types[2]}")

# Normalize the matrix using MinMax scaling
topsis_matrix = df[['Price', 'ABV', 'Acidity']].values
scaler = MinMaxScaler()
normalized_matrix = scaler.fit_transform(topsis_matrix)

# Weighted normalized decision matrix
weighted_matrix = normalized_matrix * weights

# Calculate ideal and anti-ideal solutions based on inferred criterion types
ideal_solution = []
anti_ideal_solution = []

for i, criterion in enumerate(criteria_types):
    if criterion == 'benefit':
        ideal_solution.append(np.max(weighted_matrix[:, i]))  # Max for benefits
        anti_ideal_solution.append(np.min(weighted_matrix[:, i]))  # Min for benefits
    elif criterion == 'cost':
        ideal_solution.append(np.min(weighted_matrix[:, i]))  # Min for costs
        anti_ideal_solution.append(np.max(weighted_matrix[:, i]))  # Max for costs

ideal_solution = np.array(ideal_solution)
anti_ideal_solution = np.array(anti_ideal_solution)

# Calculate distances to ideal and anti-ideal solutions
distance_to_ideal = np.sqrt(np.sum((weighted_matrix - ideal_solution) ** 2, axis=1))
distance_to_anti_ideal = np.sqrt(np.sum((weighted_matrix - anti_ideal_solution) ** 2, axis=1))

# Calculate TOPSIS scores
topsis_score = distance_to_anti_ideal / (distance_to_ideal + distance_to_anti_ideal)

# Add TOPSIS score to the dataframe
df['User_TOPSIS_Score'] = topsis_score

# Sort results by the new TOPSIS score
sorted_df = df.sort_values(by='User_TOPSIS_Score', ascending=False).reset_index(drop=True)

# Display the sorted results with wine names
print("\nTop wines based on your preferences:")
print(sorted_df[['WineName', 'Price', 'ABV', 'Acidity', 'User_TOPSIS_Score']].head())  # Display top 5 results

# Optionally save the results to a CSV file
sorted_df.to_csv('topsis_results_with_wine_names.csv', index=False)

Enter weights for each criterion (total should add up to 1):


KeyboardInterrupt: Interrupted by user