In [16]:
import pandas as pd
import numpy as np
import math
from scipy.spatial.distance import cdist, pdist, squareform

In [None]:
df = pd.read_csv("birds2025.csv", sep=';')

def minmax_scaling(df, features=['length', 'wspan', 'weight', 'AR', 'wload']):
    minmax_df = df.copy()
    for feature in features:
        column = minmax_df[feature]
        min_val = column.min()
        max_val = column.max()
        minmax_df[feature] = (column - min_val) / (max_val - min_val)
    return minmax_df
                                       
def mean_scaling(df, features=['length', 'wspan', 'weight', 'AR', 'wload']):
    scaled_df = df.copy()
    for feature in features:
        col = scaled_df[feature]
        scaled_df[feature] = (col - col.min()) / (col.max() - col.min())
    return scaled_df

def midpoint(value):
        if isinstance(value, str) and '-' in value:
            try:
                a, b = map(float, value.split('-'))
                return (a + b) / 2
            except ValueError:
                return value
        try:
            return float(value)
        except ValueError:
            return value

def midpoint_column(df, features=['length', 'wspan', 'weight']):
    df_mid = df.copy()
    for feature in features:
        df_mid[feature] = df_mid[feature].apply(midpoint)
    return df_mid

def add_bmi(df):
    """
    Adds a new column 'bmi' = weight / length^2
    """
    new_df = df.copy()
    new_df['bmi'] = new_df["weight"] / (new_df["length"] ** 2)
    return new_df


def add_wsi(df):
    """
    Adds a new column 'wsi' = wspan / length
    """
    new_df = df.copy()
    new_df['wsi'] = new_df["wspan"] / new_df["length"]
    return new_df

def pwnum_dists(df, features=['length', 'wspan', 'weight', 'AR', 'wload']):
    
    X = df[features].to_numpy()
    dist_vector = pdist(X, metric='euclidean')
    return squareform(dist_vector)

    
df = midpoint_column(df)
df_minmax = minmax_scaling(df)
df_meanscaled = mean_scaling(df)
dists1 = pwnum_dists(df_meanscaled)
dists2 = pwnum_dists(df_minmax)
print(dists1)
print(dists2)

[[0.         0.33128127 0.40904919 ... 0.50435464 0.35871061 1.09446709]
 [0.33128127 0.         0.08344737 ... 0.57578916 0.28049308 0.89153719]
 [0.40904919 0.08344737 0.         ... 0.60943039 0.29890092 0.83936217]
 ...
 [0.50435464 0.57578916 0.60943039 ... 0.         0.34109051 0.8233108 ]
 [0.35871061 0.28049308 0.29890092 ... 0.34109051 0.         0.77136425]
 [1.09446709 0.89153719 0.83936217 ... 0.8233108  0.77136425 0.        ]]
[[0.         0.33128127 0.40904919 ... 0.50435464 0.35871061 1.09446709]
 [0.33128127 0.         0.08344737 ... 0.57578916 0.28049308 0.89153719]
 [0.40904919 0.08344737 0.         ... 0.60943039 0.29890092 0.83936217]
 ...
 [0.50435464 0.57578916 0.60943039 ... 0.         0.34109051 0.8233108 ]
 [0.35871061 0.28049308 0.29890092 ... 0.34109051 0.         0.77136425]
 [1.09446709 0.89153719 0.83936217 ... 0.8233108  0.77136425 0.        ]]
