In [2]:
import numpy as np
import networkx as nx

In [11]:
np.__version__

'1.26.4'

You are given an airline network represented as a directed graph, where:

* Airports are nodes.
* Connections between airports are edges, each with a capacity that represents the maximum number of flights allowed per day.

Write a Python program to:

1. Represent this airline network as a graph.
2. Find the maximum number of flights that can flow between two given airports S (source) and T (target) using the Maximum Flow algorithm.

**Problem Statement:** You are given a dataset with multiple features. Each feature has a different range, and you are tasked with analyzing how feature scaling (normalization or standardization) affects the calculation of Euclidean distance between two points.

Write a Python program to:

1. Compute the Euclidean distance between two points in the dataset before scaling.
2. Apply Min-Max Scaling to normalize the features and compute the Euclidean distance again after scaling.
3. Compare the distances to determine how scaling impacts the contribution of each feature to the overall distance.

In [8]:
n_samples = 10000

np.random.seed(47)

x1 = np.random.uniform(1, 15, n_samples)
x2 = np.random.uniform(1000, 1000000, n_samples)

X = np.stack((x1, x2)).T
X.shape

(10000, 2)

In [9]:
X

array([[2.58883861e+00, 5.20563881e+05],
       [1.46427633e+01, 8.93599385e+04],
       [1.12022849e+01, 3.45293936e+05],
       ...,
       [2.37016198e+00, 6.32427607e+05],
       [4.69456195e+00, 4.12989799e+05],
       [2.77847607e+00, 8.13662293e+05]])

In [41]:
# Compute the Euclidean distance between two points in the dataset before scaling.

def eucliden_distance(p1, p2):
    return np.sqrt(np.sum((p1 - p2) ** 2))

p1, p2 = X[0], X[3]
eucliden_distance(p1, p2)

326414.15088285005

In [18]:
# min-max scaling (normalization)

def min_max_scaling(X):
    mn = np.min(X, axis=0)
    mx = np.max(X, axis=0)
    
    return (X - mn) / (mx - mn)

In [42]:
X_normalized = min_max_scaling(X)
X_normalized

array([[0.11340026, 0.52004607],
       [0.97450455, 0.08836479],
       [0.72872479, 0.34458211],
       ...,
       [0.09777851, 0.63203363],
       [0.26382823, 0.41235291],
       [0.12694752, 0.81346894]])

In [43]:
# euclidean distance after normalization
p1, p2 = X_normalized[0], X_normalized[3]
eucliden_distance(p1, p2) 

0.404265767830538

In [23]:
from sklearn.preprocessing import MinMaxScaler, StandardScaler

In [44]:
# Apply MinMax Scaling
normalizer = MinMaxScaler()
X_normalized_ = normalizer.fit_transform(X)
X_normalized_

array([[0.11340026, 0.52004607],
       [0.97450455, 0.08836479],
       [0.72872479, 0.34458211],
       ...,
       [0.09777851, 0.63203363],
       [0.26382823, 0.41235291],
       [0.12694752, 0.81346894]])

In [45]:
# euclidean distance after normalization
p1, p2 = X_normalized_[0], X_normalized_[3]
eucliden_distance(p1, p2) 

0.4042657678305381

In [29]:
# standarized

def standarization(X):
    mu = np.mean(X, axis=0)
    sigma = np.std(X, axis=0)
    return (X - mu) / sigma

In [46]:
X_scaled = standarization(X)
X_scaled

array([[-1.33162747,  0.08242733],
       [ 1.63407262, -1.41101964],
       [ 0.78759087, -0.52460879],
       ...,
       [-1.38542981,  0.46986012],
       [-0.81354361, -0.29014854],
       [-1.28496982,  1.09755473]])

In [47]:
# get the euclidean distance after scaling the features
p1, p2 = X_scaled[0], X_scaled[3]
eucliden_distance(p1, p2)

1.3964258071096145

In [48]:
scaler = StandardScaler()
X_scaled_ = scaler.fit_transform(X)
X_scaled_

array([[-1.33162747,  0.08242733],
       [ 1.63407262, -1.41101964],
       [ 0.78759087, -0.52460879],
       ...,
       [-1.38542981,  0.46986012],
       [-0.81354361, -0.29014854],
       [-1.28496982,  1.09755473]])

In [49]:
# get the euclidean distance after scaling the features
p1, p2 = X_scaled_[0], X_scaled_[3]
eucliden_distance(p1, p2)

1.3964258071096145

In [50]:

def cosine_simillarity(A, B):
    A_norm = np.linalg.norm(A)
    B_norm = np.linalg.norm(B)
    return np.dot(A.T, B) / (A_norm * B_norm)

In [51]:
vec1 = X[0]
vec2 = X[3]

cosine_simillarity(vec1, vec2)

0.9999999996743235

In [52]:
vec1 = X_normalized[0]
vec2 = X_normalized[3]

cosine_simillarity(vec1, vec2)

0.6575256172122318

In [53]:
vec1 = X_scaled[0]
vec2 = X_scaled[3]

cosine_simillarity(vec1, vec2)

0.3825178458916634

* can you estimate the slope and equation of the best-fit line using linear regression and how the slope relates to the direction and strength of the linear relationship between variables:

In [8]:
X = np.random.uniform(0, 1, 100) * 1.0
Y = 5 * X - 2.2 #+ np.random.normal(2.0, 3.0, 100)

X_bar = np.mean(X)
y_bar = np.mean(Y)

X_bar, y_bar

(0.48797327130595064, 0.23986635652975316)

In [9]:
m = np.sum((X - X_bar) * (Y - y_bar)) / np.sum((X - X_bar) ** 2)
m

5.0

In [10]:
c = y_bar - (m*X_bar)
c

-2.2