# Advanced NumPy

In [1]:
import numpy as np

houses = np.array([
    [3, 2000, 10],  # House 1: 3 Bedrooms, 2000 sq ft, 10 years old
    [2, 1500, 20],  # House 2: 2 Bedrooms, 1500 sq ft, 20 years old
    [4, 2500, 5],   # House 3: 4 Bedrooms, 2500 sq ft, 5 years old
])

## Broadcasting

In [None]:
# Broadcast a scalar to add 1 bedroom to each house
updated_bedrooms = houses[:, 0] + 1  # Adding 1 to each bedroom
print("Updated bedrooms after broadcasting:\n", updated_bedrooms)

In [None]:
# Subtract 5 years from the age of all houses
updated_age = houses[:, 2] - 5
print("Updated house ages after broadcasting:\n", updated_age)

In [None]:
# Broadcasting with arrays: Adjust size by multiplying all sizes by a factor of 1.1
adjusted_size = houses[:, 1] * 1.1
print("Adjusted house sizes (sq ft) by 10% increase:\n", adjusted_size)

## Reshaping

In [None]:
print(houses.shape)

In [None]:
# Reshape the 3x3 array into a 1D array (flat array)
reshaped_houses = houses.reshape(-1)
print("Reshaped houses (flattened):\n", reshaped_houses)

In [None]:
# Reshape into a 1x9 array
reshaped_houses_1x9 = houses.reshape(1, 9)
print("Reshaped houses into 1x9:\n", reshaped_houses_1x9)

## Joining arrays

In [None]:
# Create another array for joining purposes
more_houses = np.array([
    [5, 3000, 15],  # House 4: 5 Bedrooms, 3000 sq ft, 15 years old
])

# Join along axis 0 (adding more rows)
joined_houses = np.concatenate((houses, more_houses), axis=0)
print("Joined houses (by rows):\n", joined_houses)

In [None]:
# Join along axis 1 (adding columns)
extra_data = np.array([[100000], [120000], [140000]])
joined_columns = np.concatenate((houses, extra_data), axis=1)
print("Joined houses with extra column (price):\n", joined_columns)

## Stacking arrays

In [None]:
# Stack arrays vertically (row-wise)
stacked_houses_vertically = np.vstack((houses, more_houses))
print("Vertically stacked houses:\n", stacked_houses_vertically)

In [None]:
# Stack arrays horizontally (column-wise)
stacked_houses_horizontally = np.hstack((houses, extra_data))
print("Horizontally stacked houses with extra column:\n", stacked_houses_horizontally)

### Checking data type

In [None]:
# Check the data type of the array
data_type = houses.dtype
print("Data type of the houses array:", data_type)

## Linear algebra

### Matrix multiplication

In [13]:
houses = np.array([
    [3, 2000, 10],  # House 1: 3 Bedrooms, 2000 sq ft, 10 years old
    [2, 1500, 20],  # House 2: 2 Bedrooms, 1500 sq ft, 20 years old
    [4, 2500, 5],   # House 3: 4 Bedrooms, 2500 sq ft, 5 years old
])

# Define the learned weights for the model: [weight for Bedrooms, weight for Size, weight for Age]
weights = np.array([50, 100, -20])

# Bias term (intercept)
bias = 30000



In [None]:
# Matrix multiplication to predict prices for all houses at once
predicted_prices = np.dot(houses, weights) + bias
print("Predicted prices:", predicted_prices)


### Normalisation calculation

In [None]:
# Normalize features (feature scaling)
house_norms = np.linalg.norm(houses, axis=1, keepdims=True)
normalized_houses = houses / house_norms

# Predict house prices using normalized features
predicted_prices_normalized = np.dot(normalized_houses, weights) + bias
print("Predicted prices (normalized):", predicted_prices_normalized)


### Single value decomposition

In [None]:
# Perform SVD on the feature matrix
U, S, Vt = np.linalg.svd(houses, full_matrices=False)
print("U matrix:", U)

In [None]:
print("Singular values:", S)

In [None]:
print("V transpose matrix:", Vt)

Here, $U$ represents the house feature data in an orthogonal space, $S$ contains the singular values, and $Vt$ captures the relationships between the original features.

### Covariance matrix

In [None]:
# Compute covariance matrix of the features
cov_matrix = np.cov(houses, rowvar=False)
print("Covariance matrix:\n", cov_matrix)

Visualising the Covariance matrix

In [None]:
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt


# Define the data array with features for multiple houses: [Bedrooms, Size (sq ft), Age (years)]
houses = np.array([
    [3, 2000, 10],  # House 1: 3 Bedrooms, 2000 sq ft, 10 years old
    [2, 1500, 20],  # House 2: 2 Bedrooms, 1500 sq ft, 20 years old
    [4, 2500, 5],   # House 3: 4 Bedrooms, 2500 sq ft, 5 years old
])

# Standardize the data (subtract the mean and divide by the standard deviation)
houses_standardized = (houses - np.mean(houses, axis=0)) / np.std(houses, axis=0)

# Compute covariance matrix of the standardized features
cov_matrix_standardized = np.cov(houses_standardized, rowvar=False)

# Define the feature names for the labels
feature_names = ['Bedrooms', 'Size (sq ft)', 'Age (years)']

# Set up the matplotlib figure
plt.figure(figsize=(6, 6))

# Use seaborn to create a heatmap for the covariance matrix with labels
sns.heatmap(cov_matrix_standardized, annot=True, fmt=".2f", cmap='coolwarm',
            square=True, cbar_kws={"shrink": .8},
            xticklabels=feature_names, yticklabels=feature_names)

# Set labels and title
plt.title("Covariance Matrix Heatmap with Standardized Features")
plt.xlabel("Features")
plt.ylabel("Features")

# Show plot
plt.show()


### Correlation matrix

In [None]:
# Compute the Pearson correlation matrix
corr_matrix = np.corrcoef(houses, rowvar=False)
print("Pearson correlation matrix:\n", corr_matrix)

Why apply this: The covariance matrix shows how features are linearly related. For example, you could check if there's a strong positive or negative relationship between Bedrooms and Size. If two features are highly correlated, one might be redundant.

#### Visualising the Pearson's Correlation Coefficient Matrix

In [22]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

def plot_correlation_matrix(data, feature_names):
    """
    Compute and plot the Pearson correlation matrix for the given dataset.

    Parameters:
    - data: NumPy array of shape (n_samples, n_features)
    - feature_names: List of feature names corresponding to the columns in data

    Returns:
    - None (displays the heatmap of the Pearson correlation matrix)
    """
    data_standardized = (data - np.mean(data, axis=0)) / np.std(data, axis=0)
    corr_matrix = np.corrcoef(data_standardized, rowvar=False)

    plt.figure(figsize=(6, 6))
    sns.heatmap(corr_matrix, annot=True, fmt=".2f", cmap='coolwarm',
                square=True, cbar_kws={"shrink": .8},
                xticklabels=feature_names, yticklabels=feature_names)
    plt.title("Pearson Correlation Matrix Heatmap")
    plt.xlabel("Features")
    plt.ylabel("Features")
    plt.show()




In [None]:
# Feature names
feature_names = ['Bedrooms', 'Size (sq ft)', 'Age (years)']

# Call the function to plot the Pearson correlation matrix
plot_correlation_matrix(houses, feature_names)

### QR Decomposition

You could apply QR decomposition to decompose the houses matrix into an orthogonal matrix Q and an upper triangular matrix R.

Why apply this: QR decomposition is useful when solving systems of linear equations or when performing least-squares fitting (which is at the core of linear regression).

How to apply: Decompose the houses matrix using QR decomposition.

In [None]:
Q, R = np.linalg.qr(houses)
print("Q matrix:", Q)

In [None]:
print("R matrix:", R)

### Eigenvalues and Eigen vectors

You could compute the eigenvalues and eigenvectors of the covariance matrix to analyze the principal components of the features.

Why apply this: Eigenvalues and eigenvectors help in understanding the direction of maximum variance in the data, which is the foundation of Principal Component Analysis (PCA). This is useful for feature reduction.
How to apply this: Compute the eigenvalues and eigenvectors of the covariance matrix.

In [None]:
eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)
print("Eigenvalues:", eigenvalues)


In [None]:
print("Eigenvectors:\n", eigenvectors)

#### Visualising the Orthogonal matrix for Principal Components

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Example covariance matrix (could be derived from your dataset)
cov_matrix = np.array([[4, 2],
                       [2, 3]])

# Calculate the eigenvalues and eigenvectors
eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)

# Plotting the eigenvectors
origin = np.array([0, 0])  # Origin point (0,0)
fig, ax = plt.subplots()

# Plot the eigenvectors scaled by their corresponding eigenvalues
for i in range(len(eigenvalues)):
    eigenvector = eigenvectors[:, i]
    ax.arrow(origin[0], origin[1], eigenvector[0] * eigenvalues[i], eigenvector[1] * eigenvalues[i],
             head_width=0.2, head_length=0.3, fc=['r', 'b'][i], ec=['r', 'b'][i])

# Set up the plot to have equal scaling
ax.set_xlim([-5, 5])
ax.set_ylim([-5, 5])
ax.axhline(0, color='black', linewidth=0.5)
ax.axvline(0, color='black', linewidth=0.5)
ax.set_aspect('equal')

plt.title("Eigenvalues and Eigenvectors (Orthogonal Directions)")
plt.grid()
plt.show()
