In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report

def load_data(file_path, sheet_name):
    """Load data from an Excel sheet and return a DataFrame."""
    try:
        df = pd.read_excel(file_path, sheet_name=sheet_name)
        return df
    except FileNotFoundError:
        raise FileNotFoundError(f"Error: The file at path '{file_path}' was not found.")
    except ValueError as ve:
        raise ValueError(f"Value Error: {ve}")
    except Exception as e:
        raise Exception(f"An error occurred: {e}")

def dimensions(A):
    """Return the dimensions of matrix A."""
    return A.shape

def count_vectors(A):
    """Return the number of vectors in matrix A."""
    return A.shape[0]

def rank_of_matrix(A):
    """Return the rank of matrix A."""
    return np.linalg.matrix_rank(A)

def compute_costs(A, C):
    """Compute the pseudo-inverse of A and find the costs."""
    A_pseudo_inv = np.linalg.pinv(A)
    X = np.dot(A_pseudo_inv, C)
    return X

def classify_customers(df):
    """Classify customers as RICH or POOR and build a classifier."""
    df['Class'] = np.where(df['Payment (Rs)'] > 200, 'RICH', 'POOR')

    # Prepare features and labels
    features = df[['Candies (#)', 'Mangoes (Kg)', 'Milk Packets (#)']]
    labels = df['Class']

    # Handle missing values and infinities
    if features.isnull().values.any():
        features = features.fillna(0)

    if np.isinf(features).values.any():
        features.replace([np.inf, -np.inf], 0, inplace=True)

    X = features.values
    y = labels.values

    # Standardize features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

    # Train a logistic regression model
    model = LogisticRegression(max_iter=1000)
    model.fit(X_train, y_train)

    # Predict on the test set
    y_pred = model.predict(X_test)

    # Return classification report with zero_division parameter
    return classification_report(y_test, y_pred, zero_division=1)

def main():
    """Main function to execute the tasks."""
    file_path = "/content/Lab Session Data.xlsx"
    sheet_name = 'Purchase data'
    df = load_data(file_path, sheet_name)

    # Create the full matrix A and C
    A_full = df[['Candies (#)', 'Mangoes (Kg)', 'Milk Packets (#)']].values
    C_full = df[['Payment (Rs)']].values

    # Print full matrix results
    print("Full Matrix Analysis:")
    print("\nMatrix A (Full):")
    print(A_full)
    print("\nMatrix C (Full):")
    print(C_full)

    dim_A_full = dimensions(A_full)
    num_vectors_full = count_vectors(A_full)
    rank_A_full = rank_of_matrix(A_full)
    X_full = compute_costs(A_full, C_full)

    print(f"\nDimensionality of the vector space (Full): {dim_A_full}")
    print(f"Number of vectors in this vector space (Full): {num_vectors_full}")
    print(f"Rank of Matrix A (Full): {rank_A_full}")
    print("\nCost of each product available for sale (Full):")
    print(X_full)

    # Create square matrices by selecting a subset of rows and columns
    size = min(A_full.shape[0], A_full.shape[1])  # Ensure we create square matrices within bounds
    A_square1 = A_full[:size, :size]
    C_square1 = C_full[:size]

    if A_full.shape[0] > 2*size and A_full.shape[1] > 2*size:
        A_square2 = A_full[size:2*size, size:2*size]
        C_square2 = C_full[size:2*size]
    else:
        A_square2 = np.zeros((size, size))
        C_square2 = np.zeros((size, 1))

    # Print square matrix results
    print("\nSquare Matrix 1 Analysis:")
    print("\nMatrix A (Square 1):")
    print(A_square1)
    print("\nMatrix C (Square 1):")
    print(C_square1)

    dim_A_square1 = dimensions(A_square1)
    num_vectors_square1 = count_vectors(A_square1)
    rank_A_square1 = rank_of_matrix(A_square1)
    X_square1 = compute_costs(A_square1, C_square1)

    print(f"\nDimensionality of the vector space (Square 1): {dim_A_square1}")
    print(f"Number of vectors in this vector space (Square 1): {num_vectors_square1}")
    print(f"Rank of Matrix A (Square 1): {rank_A_square1}")
    print("\nCost of each product available for sale (Square 1):")
    print(X_square1)

    # Print results for the second square matrix
    print("\nSquare Matrix 2 Analysis:")
    print("\nMatrix A (Square 2):")
    print(A_square2)
    print("\nMatrix C (Square 2):")
    print(C_square2)

    dim_A_square2 = dimensions(A_square2)
    num_vectors_square2 = count_vectors(A_square2)
    rank_A_square2 = rank_of_matrix(A_square2)
    X_square2 = compute_costs(A_square2, C_square2)

    print(f"\nDimensionality of the vector space (Square 2): {dim_A_square2}")
    print(f"Number of vectors in this vector space (Square 2): {num_vectors_square2}")
    print(f"Rank of Matrix A (Square 2): {rank_A_square2}")
    print("\nCost of each product available for sale (Square 2):")
    print(X_square2)

    # Compare results
    print("\nComparison of Costs:")
    print("Costs from Full Matrix:")
    print(X_full)
    print("\nCosts from Square Matrix 1:")
    print(X_square1)
    print("\nCosts from Square Matrix 2:")
    print(X_square2)

    # Classify customers
    classification_report_str = classify_customers(df)
    print("\nClassification Report:")
    print(classification_report_str)

if __name__ == "__main__":
    main()


Full Matrix Analysis:

Matrix A (Full):
[[20  6  2]
 [16  3  6]
 [27  6  2]
 [19  1  2]
 [24  4  2]
 [22  1  5]
 [15  4  2]
 [18  4  2]
 [21  1  4]
 [16  2  4]]

Matrix C (Full):
[[386]
 [289]
 [393]
 [110]
 [280]
 [167]
 [271]
 [274]
 [148]
 [198]]

Dimensionality of the vector space (Full): (10, 3)
Number of vectors in this vector space (Full): 10
Rank of Matrix A (Full): 3

Cost of each product available for sale (Full):
[[ 1.]
 [55.]
 [18.]]

Square Matrix 1 Analysis:

Matrix A (Square 1):
[[20  6  2]
 [16  3  6]
 [27  6  2]]

Matrix C (Square 1):
[[386]
 [289]
 [393]]

Dimensionality of the vector space (Square 1): (3, 3)
Number of vectors in this vector space (Square 1): 3
Rank of Matrix A (Square 1): 3

Cost of each product available for sale (Square 1):
[[ 1.]
 [55.]
 [18.]]

Square Matrix 2 Analysis:

Matrix A (Square 2):
[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]

Matrix C (Square 2):
[[0.]
 [0.]
 [0.]]

Dimensionality of the vector space (Square 2): (3, 3)
Number of vectors in thi