# Supremum Distance 
also known as the Chebyshev Distance. This metric measures the maximum absolute difference between the coordinates of two points. It is particularly useful in scenarios where the most significant difference between any single dimension matters the most.

### Importing required Libraries

In [4]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from scipy.spatial.distance import chebyshev

### Load Datasets

In [5]:
# Load datasets
adult_df = pd.read_csv("../adult/adult_trim.data", header=None) # No header
titanic_df = pd.read_csv('../titanic/titanic_trim.csv') # Has header

# Rename columns for clarity
adult_df.columns = ["age", "workclass", "fnlwgt", "education", "education_num", 
                    "marital_status", "occupation", "relationship", "race", "sex", 
                    "capital_gain", "capital_loss", "hours_per_week", "native_country", "income"]
adult_df.dropna(inplace=True)

In [6]:
adult_df

Unnamed: 0,age,workclass,fnlwgt,education,education_num,marital_status,occupation,relationship,race,sex,capital_gain,capital_loss,hours_per_week,native_country,income
0,39,State-gov,77516,Bachelors,13,Never-married,Adm-clerical,Not-in-family,White,Male,2174,0,40,United-States,<=50K
1,50,Self-emp-not-inc,83311,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,13,United-States,<=50K
2,38,Private,215646,HS-grad,9,Divorced,Handlers-cleaners,Not-in-family,White,Male,0,0,40,United-States,<=50K
3,53,Private,234721,11th,7,Married-civ-spouse,Handlers-cleaners,Husband,Black,Male,0,0,40,United-States,<=50K
4,28,Private,338409,Bachelors,13,Married-civ-spouse,Prof-specialty,Wife,Black,Female,0,0,40,Cuba,<=50K
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,29,Local-gov,115585,Some-college,10,Never-married,Handlers-cleaners,Not-in-family,White,Male,0,0,50,United-States,<=50K
96,48,Self-emp-not-inc,191277,Doctorate,16,Married-civ-spouse,Prof-specialty,Husband,White,Male,0,1902,60,United-States,>50K
97,37,Private,202683,Some-college,10,Married-civ-spouse,Sales,Husband,White,Male,0,0,48,United-States,>50K
98,48,Private,171095,Assoc-acdm,12,Divorced,Exec-managerial,Unmarried,White,Female,0,0,40,England,<=50K


In [7]:
titanic_df

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
150,151,0,2,"Bateman, Rev. Robert James",male,51.0,0,0,S.O.P. 1166,12.5250,,S
151,152,1,1,"Pears, Mrs. Thomas (Edith Wearne)",female,22.0,1,0,113776,66.6000,C2,S
152,153,0,3,"Meo, Mr. Alfonzo",male,55.5,0,0,A.5. 11206,8.0500,,S
153,154,0,3,"van Billiard, Mr. Austin Blyler",male,40.5,0,2,A/5. 851,14.5000,,S


### Select relevant columns from Adult dataset (mix of nominal and ratio-scaled)

In [8]:
adult_df = adult_df[["age", "workclass", "education", "education_num", "sex"]]

adult_df

Unnamed: 0,age,workclass,education,education_num,sex
0,39,State-gov,Bachelors,13,Male
1,50,Self-emp-not-inc,Bachelors,13,Male
2,38,Private,HS-grad,9,Male
3,53,Private,11th,7,Male
4,28,Private,Bachelors,13,Female
...,...,...,...,...,...
95,29,Local-gov,Some-college,10,Male
96,48,Self-emp-not-inc,Doctorate,16,Male
97,37,Private,Some-college,10,Male
98,48,Private,Assoc-acdm,12,Female


### Encode nominal attributes as integers for processing

In [9]:
label_encoders = {}
for column in adult_df.columns:
    if adult_df[column].dtype == object:
        le = LabelEncoder()
        adult_df[column] = le.fit_transform(adult_df[column])
        label_encoders[column] = le

adult_df


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  adult_df[column] = le.fit_transform(adult_df[column])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  adult_df[column] = le.fit_transform(adult_df[column])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  adult_df[column] = le.fit_transform(adult_df[column])


Unnamed: 0,age,workclass,education,education_num,sex
0,39,6,7,13,1
1,50,5,7,13,1
2,38,3,9,9,1
3,53,3,1,7,1
4,28,3,7,13,0
...,...,...,...,...,...
95,29,2,12,10,1
96,48,5,8,16,1
97,37,3,12,10,1
98,48,3,5,12,0


### Clean and preprocess Titanic dataset

In [10]:
titanic_df.dropna(inplace=True)
titanic_df

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
6,7,0,1,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,E46,S
10,11,1,3,"Sandstrom, Miss. Marguerite Rut",female,4.0,1,1,PP 9549,16.7,G6,S
11,12,1,1,"Bonnell, Miss. Elizabeth",female,58.0,0,0,113783,26.55,C103,S
21,22,1,2,"Beesley, Mr. Lawrence",male,34.0,0,0,248698,13.0,D56,S
23,24,1,1,"Sloper, Mr. William Thompson",male,28.0,0,0,113788,35.5,A6,S
27,28,0,1,"Fortune, Mr. Charles Alexander",male,19.0,3,2,19950,263.0,C23 C25 C27,S
52,53,1,1,"Harper, Mrs. Henry Sleeper (Myna Haxtun)",female,49.0,1,0,PC 17572,76.7292,D33,C
54,55,0,1,"Ostby, Mr. Engelhart Cornelius",male,65.0,0,1,113509,61.9792,B30,C


### Select relevant columns from Titanic dataset (mix of nominal and ratio-scaled)

In [11]:
titanic_df = titanic_df[["Age", "Sex", "Pclass", "Fare", "Embarked"]]
titanic_df

Unnamed: 0,Age,Sex,Pclass,Fare,Embarked
1,38.0,female,1,71.2833,C
3,35.0,female,1,53.1,S
6,54.0,male,1,51.8625,S
10,4.0,female,3,16.7,S
11,58.0,female,1,26.55,S
21,34.0,male,2,13.0,S
23,28.0,male,1,35.5,S
27,19.0,male,1,263.0,S
52,49.0,female,1,76.7292,C
54,65.0,male,1,61.9792,C


### Encode Nominal as Integers for processing

In [12]:
label_encoders_titanic = {}
for column in titanic_df.columns:
    if titanic_df[column].dtype == object:
        le = LabelEncoder()
        titanic_df[column] = le.fit_transform(titanic_df[column])
        label_encoders[column] = le

titanic_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  titanic_df[column] = le.fit_transform(titanic_df[column])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  titanic_df[column] = le.fit_transform(titanic_df[column])


Unnamed: 0,Age,Sex,Pclass,Fare,Embarked
1,38.0,0,1,71.2833,0
3,35.0,0,1,53.1,1
6,54.0,1,1,51.8625,1
10,4.0,0,3,16.7,1
11,58.0,0,1,26.55,1
21,34.0,1,2,13.0,1
23,28.0,1,1,35.5,1
27,19.0,1,1,263.0,1
52,49.0,0,1,76.7292,0
54,65.0,1,1,61.9792,0


### Combine the datasets into a list for further processing

In [13]:

# Combine the datasets into a list for further processing
datasets = {
    "Adult Dataset": adult_df,
    "Titanic Dataset": titanic_df
}

### Compute Supremum Distance

In [14]:
def supremum_distance(a, b):
    """Calculate the Supremum (Chebyshev) Distance between two vectors."""
    try:
        return chebyshev(a, b)
    except Exception as e:
        return np.nan

# Function to create the Supremum Distance matrix
def calculate_supremum_matrix(dataset):
    n = len(dataset)
    supremum_matrix = np.zeros((n, n))
    
    for i in range(n):
        for j in range(n):
            supremum_matrix[i, j] = supremum_distance(dataset.iloc[i].values, dataset.iloc[j].values)
    
    return pd.DataFrame(supremum_matrix)

### Calculate Supremum Distance

#### For Adult Dataset

In [15]:
supremum_matrix_adult = calculate_supremum_matrix(adult_df)
supremum_matrix_adult

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,90,91,92,93,94,95,96,97,98,99
0,0.0,11.0,4.0,14.0,11.0,3.0,10.0,13.0,8.0,3.0,...,18.0,5.0,11.0,9.0,5.0,10.0,9.0,5.0,9.0,7.0
1,11.0,0.0,12.0,6.0,22.0,13.0,8.0,4.0,19.0,8.0,...,7.0,13.0,22.0,20.0,16.0,21.0,3.0,13.0,2.0,18.0
2,4.0,12.0,0.0,15.0,10.0,5.0,11.0,14.0,7.0,4.0,...,19.0,3.0,10.0,8.0,4.0,9.0,10.0,3.0,10.0,6.0
3,14.0,6.0,15.0,0.0,25.0,16.0,4.0,8.0,22.0,11.0,...,5.0,16.0,25.0,23.0,19.0,24.0,9.0,16.0,5.0,21.0
4,11.0,22.0,10.0,25.0,0.0,9.0,21.0,24.0,3.0,14.0,...,29.0,9.0,5.0,4.0,6.0,5.0,20.0,9.0,20.0,4.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,10.0,21.0,9.0,24.0,5.0,8.0,20.0,23.0,4.0,13.0,...,28.0,8.0,1.0,3.0,5.0,0.0,19.0,8.0,19.0,3.0
96,9.0,3.0,10.0,9.0,20.0,11.0,11.0,7.0,17.0,6.0,...,9.0,11.0,20.0,18.0,14.0,19.0,0.0,11.0,4.0,16.0
97,5.0,13.0,3.0,16.0,9.0,4.0,12.0,15.0,6.0,5.0,...,20.0,1.0,9.0,7.0,5.0,8.0,11.0,0.0,11.0,5.0
98,9.0,2.0,10.0,5.0,20.0,11.0,7.0,4.0,17.0,6.0,...,9.0,11.0,20.0,18.0,14.0,19.0,4.0,11.0,0.0,16.0


#### For Titanic Dataset

In [16]:
supremum_matrix_titanic = calculate_supremum_matrix(titanic_df)
supremum_matrix_titanic

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,17,18,19,20,21,22,23,24,25,26
0,0.0,18.1833,19.4208,54.5833,44.7333,58.2833,35.7833,191.7167,11.0,27.0,...,17.0,19.2833,176.2375,58.2833,16.0,45.0,18.1833,14.0,45.2833,16.0
1,18.1833,0.0,19.0,36.4,26.55,40.1,17.6,209.9,23.6292,30.0,...,24.1875,12.0,194.4208,40.1,24.1875,26.8167,2.0,26.1,27.1,13.5
2,19.4208,19.0,0.0,50.0,25.3125,38.8625,26.0,211.1375,24.8667,11.0,...,33.0,7.0,195.6583,38.8625,25.425,35.0,17.0,30.0,25.8625,32.0
3,54.5833,36.4,50.0,0.0,54.0,30.0,24.0,246.3,60.0292,61.0,...,60.5875,43.0,230.8208,28.5,60.5875,15.0,36.4,62.5,32.5,49.9
4,44.7333,26.55,25.3125,54.0,0.0,24.0,30.0,236.45,50.1792,35.4292,...,50.7375,25.45,220.9708,25.5,50.7375,39.0,26.55,52.65,21.5,40.05
5,58.2833,40.1,38.8625,30.0,24.0,0.0,22.5,250.0,63.7292,48.9792,...,64.2875,39.0,234.5208,1.5,64.2875,15.0,40.1,66.2,13.0,53.6
6,35.7833,17.6,26.0,24.0,30.0,22.5,0.0,227.5,41.2292,37.0,...,41.7875,19.0,212.0208,22.5,41.7875,9.2167,17.6,43.7,9.5,31.1
7,191.7167,209.9,211.1375,246.3,236.45,250.0,227.5,0.0,186.2708,201.0208,...,185.7125,211.0,15.4792,250.0,185.7125,236.7167,209.9,183.8,237.0,196.4
8,11.0,23.6292,24.8667,60.0292,50.1792,63.7292,41.2292,186.2708,0.0,16.0,...,28.0,24.7292,170.7916,63.7292,5.0,50.4459,23.6292,25.0,50.7292,27.0
9,27.0,30.0,11.0,61.0,35.4292,48.9792,37.0,201.0208,16.0,0.0,...,44.0,18.0,185.5416,48.9792,15.3083,46.0,28.0,41.0,35.9792,43.0


###  Explanation
Supremum Distance Calculation: This metric considers the largest difference between any single dimension of two data points. It can be thought of as the distance you would move in a chessboard when the king moves, as it captures the maximum deviation across all dimensions.

Handling Different Data Types: Supremum Distance works well with interval and ratio-scaled data, and can be applied to ordinal data as well. It's particularly useful when the most significant difference in any single attribute is of primary concern.

### Observation and Analysis
The resulting matrices will show the pairwise Supremum distances between data points. A smaller value indicates that the largest difference between any dimension of the data points is small, while a larger value indicates a significant difference in at least one dimension.

Supremum Distance is useful in scenarios where outliers or extreme differences in one dimension are more significant than cumulative differences across all dimensions.