In [1]:
from sklearn.datasets import load_breast_cancer
import numpy as np
import matplotlib.pyplot as plt

%matplotlib qt
# Load dataset
breast = load_breast_cancer()
x = breast.data
target = breast.target

# Standardize the data (zero mean, unit variance)
x_mean = np.mean(x, axis=0)
x_std = np.std(x, axis=0)
x_scaled = (x - x_mean) / x_std


In [2]:
# Compute covariance matrix
cov_matrix = np.cov(x_scaled.T)

# Compute eigenvalues and eigenvectors
eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)

# Sort eigenvectors by decreasing eigenvalue magnitude
sorted_indices = np.argsort(eigenvalues)[::-1]
eigenvectors_sorted = eigenvectors[:, sorted_indices]
eigenvalues_sorted = eigenvalues[sorted_indices]

# Project data onto the first two principal components
pca_projection = x_scaled @ eigenvectors_sorted[:, :2]


In [4]:
# Scatter plot
plt.figure(figsize=(8, 6))
scatter = plt.scatter(pca_projection[:, 0], pca_projection[:, 1],
                      c=target, cmap='bwr', alpha=0.7, edgecolors='k')
plt.xlabel("First Principal Component")
plt.ylabel("Second Principal Component")
plt.title("Breast Cancer Dataset - PCA Projection")
plt.legend(handles=scatter.legend_elements()[0], labels=["Cancer (0)", "No Cancer (1)"])
plt.grid(True)
plt.tight_layout()
plt.show()


In [5]:
# Assume pca_projection is already computed and has shape [n_samples, 2]
# Extract the first principal component values
pc1 = pca_projection[:, 0]

# Compute mean projections for each class
mean_pc1_negative = np.mean(pc1[target == 0])  # Cancer patients
mean_pc1_positive = np.mean(pc1[target == 1])  # Non-cancer patients

print(f"Mean PC1 projection (negative, cancer): {mean_pc1_negative:.4f}")
print(f"Mean PC1 projection (positive, no cancer): {mean_pc1_positive:.4f}")


Mean PC1 projection (negative, cancer): 3.7148
Mean PC1 projection (positive, no cancer): -2.2060


In [6]:
# Assume pca_projection and target are already defined
pc1 = pca_projection[:, 0]

# Apply rule: predict 1 if PC1 < 0 (no cancer), else 0 (cancer)
predicted = np.where(pc1 < 0, 1, 0)

# Calculate accuracy
accuracy = np.mean(predicted == target)
print(f"Classifier accuracy: {accuracy:.4f}")


Classifier accuracy: 0.9156


In [7]:
# Assume pca_projection is already computed
pc1 = pca_projection[:, 0]

# Classify as positive (no cancer, label 1) if PC1 < 0
predicted = np.where(pc1 < 0, 1, 0)

# Count how many samples are classified as positive
num_positive = np.sum(predicted == 1)

print(f"Number of samples classified as positive (no cancer): {num_positive}")


Number of samples classified as positive (no cancer): 349


In [8]:
from sklearn.datasets import load_breast_cancer

# Load the dataset
breast = load_breast_cancer()
x = breast.data
target = breast.target

# Get shape
n_samples, n_features = x.shape

print(f"Number of observations (patients): {n_samples}")
print(f"Number of features per observation: {n_features}")


Number of observations (patients): 569
Number of features per observation: 30


In [16]:
import numpy as np
from scipy.stats import norm
import matplotlib.pyplot as plt

# Load data
roi_D = np.loadtxt("D_Cubes.txt")
roi_E = np.loadtxt("E_Cubes.txt")

# Estimate parameters
mu_D, std_D = np.mean(roi_D), np.std(roi_D)
mu_E, std_E = np.mean(roi_E), np.std(roi_E)

# Search for intersection
x_range = np.linspace(min(mu_D, mu_E) - 3*max(std_D, std_E),
                      max(mu_D, mu_E) + 3*max(std_D, std_E), 1000)
pdf_D = norm.pdf(x_range, mu_D, std_D)
pdf_E = norm.pdf(x_range, mu_E, std_E)

# Find intersection point (minimum absolute difference)
diff = np.abs(pdf_D - pdf_E)
threshold_index = np.argmin(diff)
optimal_threshold = x_range[threshold_index]

# Output result
print(f"Optimal threshold between ROI D and ROI E: {optimal_threshold:.2f}")

# Optional: plot
plt.plot(x_range, pdf_D, label='ROI D')
plt.plot(x_range, pdf_E, label='ROI E')
plt.axvline(optimal_threshold, color='red', linestyle='--', label=f'Threshold = {optimal_threshold:.2f}')
plt.title("Gaussian PDFs and Optimal Threshold")
plt.xlabel("Intensity")
plt.ylabel("Probability Density")
plt.legend()
plt.grid(True)
plt.show()


Optimal threshold between ROI D and ROI E: -8.57


In [19]:
import numpy as np
import matplotlib.pyplot as plt

# Cost function
def cost(x1, x2):
    return 7 * x1**2 + x1 * x2 + 3 * x2**2

# Gradient of the cost function
def gradient(x1, x2):
    dc_dx1 = 14 * x1 + x2
    dc_dx2 = x1 + 6 * x2
    return np.array([dc_dx1, dc_dx2])

# Parameters
alpha = 0.1              # step size
iterations = 26
x_start = np.array([2.0, 2.0])  # Starting point (green dot)
x_vals = [x_start]

# Perform gradient descent
x = x_start
for _ in range(iterations):
    grad = gradient(x[0], x[1])
    x = x - alpha * grad
    x_vals.append(x)

x_vals = np.array(x_vals)

# Plot cost surface contours
x1_grid, x2_grid = np.meshgrid(np.linspace(-4, 4, 200), np.linspace(-4, 4, 200))
c_grid = cost(x1_grid, x2_grid)

plt.figure(figsize=(8, 6))
plt.contour(x1_grid, x2_grid, c_grid, levels=50, cmap='viridis')
plt.plot(x_vals[:, 0], x_vals[:, 1], 'r-o', label="Gradient Descent Path")
plt.scatter(x_start[0], x_start[1], color='green', s=100, label='Start Point')
plt.title("Gradient Descent in 2D Parameter Space")
plt.xlabel("$x_1$")
plt.ylabel("$x_2$")
plt.legend()
plt.grid(True)
plt.axis('equal')
plt.show()


In [20]:
import numpy as np

# Cost function
def cost(x1, x2):
    return 7 * x1**2 + x1 * x2 + 3 * x2**2

# Gradient of the cost function
def gradient(x1, x2):
    dc_dx1 = 14 * x1 + x2
    dc_dx2 = x1 + 6 * x2
    return np.array([dc_dx1, dc_dx2])

# Parameters
alpha = 0.1
x = np.array([2.0, 2.0])  # Start point
threshold = 2.0

# Run gradient descent and monitor cost
for i in range(1000):  # Max 1000 iterations for safety
    c = cost(x[0], x[1])
    if c < threshold:
        print(f"Cost dropped below {threshold} after {i} iterations (c = {c:.4f})")
        break
    grad = gradient(x[0], x[1])
    x = x - alpha * grad
else:
    print("Cost did not drop below threshold within 1000 iterations.")


Cost dropped below 2.0 after 2 iterations (c = 1.2716)


In [26]:


# Load training data
data = np.loadtxt("traffic_train.txt", delimiter=",")

# Split into features and classes
density = data[:, 0]
speed = data[:, 1]
weather = data[:, 2]  # not used for this plot

# Class labels based on row order
# First 140 → Class 1 (morning), Next 140 → Class 2 (afternoon)
class1_density = density[:100]
class1_speed = speed[:100]
class2_density = density[140:240]
class2_speed = speed[140:240]

# Plot
plt.figure(figsize=(8, 6))
plt.scatter(class1_density, class1_speed, c='green', label="Morning Traffic (Class 1)")
plt.scatter(class2_density, class2_speed, c='blue', label="Afternoon Traffic (Class 2)")
plt.xlabel("Density (cars)")
plt.ylabel("Speed (km/h)")
plt.title("Traffic Training Data (First 100 Samples per Class)")
plt.legend()
plt.grid(True)
plt.show()


In [28]:
import numpy as np
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA

# Load training data
train_data = np.loadtxt("traffic_train.txt", delimiter=",")
X_train = train_data[:, :2]  # density and speed
y_train = np.array([0]*140 + [1]*140)  # 0 = morning, 1 = afternoon

# Train LDA model
lda = LDA()
lda.fit(X_train, y_train)

# Load test data
test_data = np.loadtxt("traffic_test.txt", delimiter=",")
X_test = test_data[:, :2]
y_test = np.array([0]*60 + [1]*60)

# Predict class labels
y_pred = lda.predict(X_test)

# Count misclassified afternoon samples as morning
afternoon_indices = np.arange(60, 120)
afternoon_misclassified = np.sum(y_pred[afternoon_indices] == 0)

print(f"Number of afternoon samples classified as morning (traffic jams): {afternoon_misclassified}")


Number of afternoon samples classified as morning (traffic jams): 9


In [29]:
import numpy as np

# Load the training data
data = np.loadtxt("traffic_train.txt", delimiter=",")

# Extract weather column for morning samples (first 140)
weather_morning = data[:140, 2]

# Count how many mornings had rain (weather = 1)
num_rainy_mornings = np.sum(weather_morning == 1)

print(f"Number of rainy mornings in the training set: {int(num_rainy_mornings)}")


Number of rainy mornings in the training set: 70


In [31]:
import numpy as np
from skimage import io

# Load the images (they should be label masks)
fixed = io.imread("LabelsFixedImg.png")
moving = io.imread("LabelsMovingImg.png")

# Get coordinates of landmarks (labels 1 to 5)
coords_fixed = np.argwhere((fixed >= 1) & (fixed <= 5))
coords_moving = np.argwhere((moving >= 1) & (moving <= 5))

# Compute average landmark positions
avg_fixed = np.mean(coords_fixed, axis=0)
avg_moving = np.mean(coords_moving, axis=0)

# Compute Euclidean distance
distance = np.linalg.norm(avg_fixed - avg_moving)

print(f"Euclidean distance between average landmarks: {distance:.2f}")


Euclidean distance between average landmarks: 7.28


In [33]:
import numpy as np
from skimage import io
from skimage.morphology import binary_opening, disk
from skimage.measure import label

# Step 1: Load grayscale images
img_x = io.imread("x_NisslStain_9-260.81.png", as_gray=True)
img_y = io.imread("y_NisslStain_9-260.81.png", as_gray=True)

# Step 2: Threshold the images at 30
bin_x = img_x > 30
bin_y = img_y > 30

# Step 3: Apply morphological opening with disk structuring element of size 3
opened_x = binary_opening(bin_x, disk(3))
opened_y = binary_opening(bin_y, disk(3))

# Step 4: Label connected components (BLOBs)
label_x = label(opened_x)
label_y = label(opened_y)

# Step 5: Count BLOBs (ignore background = 0)
num_cells_x = np.max(label_x)
num_cells_y = np.max(label_y)

# Step 6: Output results
print(f"Number of individual cells in x image: {num_cells_x}")
print(f"Number of individual cells in y image: {num_cells_y}")


Number of individual cells in x image: 0
Number of individual cells in y image: 0


In [35]:
import pydicom
import numpy as np
from skimage import morphology, measure
from skimage.morphology import disk
import matplotlib.pyplot as plt

# Step 1: Load DICOM image
ds = pydicom.dcmread("1-189.dcm")
img = ds.pixel_array

# Step 2: Thresholding between 100 and 250
binary = (img >= 100) & (img <= 250)

# Step 3: Label connected components
label_img = measure.label(binary)

# Step 4: Filter BLOBs by area and perimeter (kidneys expected to be moderate to large)
region_props = measure.regionprops(label_img)

# Define area and perimeter thresholds (tuned empirically or based on prior info)
area_min = 300
area_max = 2000
perimeter_min = 80
perimeter_max = 300

# Create mask for valid kidney regions
kidney_mask = np.zeros_like(binary, dtype=bool)
for region in region_props:
    if area_min <= region.area <= area_max and perimeter_min <= region.perimeter <= perimeter_max:
        kidney_mask[label_img == region.label] = 1

# Step 5: Morphological closing with disk radius 3
kidney_closed = morphology.closing(kidney_mask, disk(3))

# Step 6: Compute number of foreground pixels
pixel_count = np.sum(kidney_closed)

# Step 7: Convert to physical area (0.78 mm × 0.78 mm)
pixel_area_mm2 = 0.78 * 0.78
total_area_mm2 = pixel_count * pixel_area_mm2

total_area_cm2 = total_area_mm2 / 100
print(f"Total kidney area after processing: {total_area_cm2:.2f} cm²")



Total kidney area after processing: 4.44 cm²


In [36]:
# Step 1: HU image (from DICOM)
hu_img = ds.pixel_array  # HU values

# Step 2: Apply mask to extract HU values inside the segmented kidneys
kidney_hu_values = hu_img[kidney_closed]

# Step 3: Compute median HU value
median_hu = np.median(kidney_hu_values)

print(f"Median HU value in kidney regions: {median_hu:.2f}")


Median HU value in kidney regions: 128.00


In [41]:
from skimage import io
from scipy.spatial import distance

# Load expert segmentation (binary image: 1 = kidney, 0 = background)
expert_mask = io.imread("1-189.dcm") > 0  # adjust filename as needed

# Flatten both masks to 1D
flat_expert = expert_mask.ravel()
flat_ours = kidney_closed.ravel()  # from your morphological segmentation

# Compute DICE score
dice_score = 1 - distance.dice(flat_ours, flat_expert)

print(f"DICE score between your segmentation and expert: {dice_score:.4f}")


Reading DICOM (examining files): 36/36 files (100.0%)
  Found 1 correct series.
DICE score between your segmentation and expert: 0.0214


In [42]:
from skimage.measure import regionprops, label

# Assume 'binary' is your thresholded image between 100 and 250
label_img = label(binary)

# Measure all properties
props = regionprops(label_img)

# Loop to examine areas for blobs with perimeter in [400, 600]
filtered_areas = []

for region in props:
    if 400 <= region.perimeter <= 600 and region.area <= 5000:
        filtered_areas.append(region.area)

# Sort to understand the range
filtered_areas.sort()
print("Filtered candidate areas:", filtered_areas)


Filtered candidate areas: [963.0, 1518.0, 2816.0, 3525.0]


In [45]:
import numpy as np
from sklearn.decomposition import PCA
from skimage import io
import glob

# Step 1: Load and flatten images
image_paths = sorted(glob.glob("exam_02502_E2024_data/screws/*.jpg"))  # or .jpg, depending on format
images = [io.imread(path, as_gray=True).flatten() for path in image_paths]
X = np.array(images)

# Step 2: Apply PCA
pca = PCA()
pca.fit(X)

# Step 3: Compute cumulative explained variance
cum_var = np.cumsum(pca.explained_variance_ratio_)

# Step 4: Find how many components explain at least 44%
n_components = np.argmax(cum_var >= 0.44) + 1  # +1 because indices start at 0

print(f"Number of PCA components needed to explain at least 44% variance: {n_components}")


Number of PCA components needed to explain at least 44% variance: 3


In [47]:
import numpy as np
from sklearn.decomposition import PCA
from skimage import io
import matplotlib.pyplot as plt
import glob

# Load images and flatten them
image_paths = sorted(glob.glob("exam_02502_E2024_data/screws/*.jpg"))  # adjust the path and extension
images = [io.imread(path, as_gray=True).flatten() for path in image_paths]
X = np.array(images)

# Apply PCA
pca = PCA()
X_pca = pca.fit_transform(X)  # shape: [n_images, n_components]

# Get PC1 values (first column)
pc1_values = X_pca[:, 0]

# Find indices of min and max PC1 projection
min_index = np.argmin(pc1_values)
max_index = np.argmax(pc1_values)

# Load corresponding original images
img_min = io.imread(image_paths[min_index])
img_max = io.imread(image_paths[max_index])

# Plot the two images
plt.figure(figsize=(10, 4))

plt.subplot(1, 2, 1)
plt.imshow(img_min, cmap='gray')
plt.title(f"Min PC1 (Index {min_index})")

plt.subplot(1, 2, 2)
plt.imshow(img_max, cmap='gray')
plt.title(f"Max PC1 (Index {max_index})")

plt.suptitle("Images with Extreme PC1 Values")
plt.axis('off')
plt.tight_layout()
plt.show()


In [48]:
import numpy as np
from sklearn.decomposition import PCA
from skimage import io
import matplotlib.pyplot as plt
import glob
import os

# Load images and their filenames
image_paths = sorted(glob.glob("exam_02502_E2024_data/screws/*.jpg"))  # or .png depending on format
filenames = [os.path.basename(p) for p in image_paths]
images = [io.imread(p, as_gray=True).flatten() for p in image_paths]
X = np.array(images)

# Perform PCA
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)

# Find index of screws_007.jpg
index_007 = filenames.index("screws_007.jpg")

# Plot all PCA projections and highlight screws_007.jpg
plt.figure(figsize=(8, 6))
plt.scatter(X_pca[:, 0], X_pca[:, 1], c='gray', label="Other screws")
plt.scatter(X_pca[index_007, 0], X_pca[index_007, 1], c='red', label="screws_007.jpg", edgecolors='black', s=100)
plt.title("Screw projections on PCA space (PC1 vs PC2)")
plt.xlabel("Principal Component 1")
plt.ylabel("Principal Component 2")
plt.legend()
plt.grid(True)
plt.axis('equal')
plt.show()


In [49]:
import numpy as np
from sklearn.decomposition import PCA
from skimage import io
import glob
import os
from scipy.spatial.distance import pdist, squareform

# Load and flatten images
image_paths = sorted(glob.glob("exam_02502_E2024_data/screws/*.jpg"))  # or .png depending on format
filenames = [os.path.basename(p) for p in image_paths]
images = [io.imread(p, as_gray=True).flatten() for p in image_paths]
X = np.array(images)

# Project into PCA space
pca = PCA(n_components=7)  # or use 2 for visualization only
X_pca = pca.fit_transform(X)

# Compute pairwise distances
dist_matrix = squareform(pdist(X_pca))

# Set diagonal to infinity to ignore self-comparison
np.fill_diagonal(dist_matrix, np.inf)

# Find indices of the minimum distance
i, j = np.unravel_index(np.argmin(dist_matrix), dist_matrix.shape)

# Output the result
print(f"The two most similar photos in PCA space are:")
print(f"{filenames[i]} and {filenames[j]}")
print(f"Distance: {dist_matrix[i, j]:.4f}")


The two most similar photos in PCA space are:
screws_012.jpg and screws_016.jpg
Distance: 3.3881


In [50]:
import numpy as np
from sklearn.decomposition import PCA
from skimage import io
import glob
import os
from scipy.spatial.distance import euclidean

# Load and flatten images
image_paths = sorted(glob.glob("exam_02502_E2024_data/screws/*.jpg"))  # or .png depending on your data
filenames = [os.path.basename(p) for p in image_paths]
images = [io.imread(p, as_gray=True).flatten() for p in image_paths]
X = np.array(images)

# Step 1: Apply PCA using all components
n_components = X.shape[0]  # 20 images → up to 20 components
pca = PCA(n_components=n_components)
X_pca = pca.fit_transform(X)

# Step 2: Find indices of screws_007.jpg and screws_008.jpg
index_007 = filenames.index("screws_007.jpg")
index_008 = filenames.index("screws_008.jpg")

# Step 3: Compute Euclidean distance in full PCA space
distance = euclidean(X_pca[index_007], X_pca[index_008])
print(f"Distance between screws_007 and screws_008 in PCA space: {distance:.4f}")


Distance between screws_007 and screws_008 in PCA space: 47.3644
