In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report

def load_data(file_path, sheet_name):
    """Load data from an Excel sheet and return a DataFrame."""
    try:
        df = pd.read_excel(file_path, sheet_name=sheet_name)
        return df
    except FileNotFoundError:
        raise FileNotFoundError(f"Error: The file at path '{file_path}' was not found.")
    except ValueError as ve:
        raise ValueError(f"Value Error: {ve}")
    except Exception as e:
        raise Exception(f"An error occurred: {e}")

def dimensions(A):
    """Return the dimensions of matrix A."""
    return A.shape

def count_vectors(A):
    """Return the number of vectors in matrix A."""
    return A.shape[0]

def rank_of_matrix(A):
    """Return the rank of matrix A."""
    return np.linalg.matrix_rank(A)

def compute_costs(A, C):
    """Compute the pseudo-inverse of A and find the costs."""
    A_pseudo_inv = np.linalg.pinv(A)
    X = np.dot(A_pseudo_inv, C)
    return X

def classify_customers(df):
    """Classify customers as RICH or POOR and build a classifier."""
    df['Class'] = np.where(df['Payment (Rs)'] > 200, 'RICH', 'POOR')

    # Check if there are any NaNs or infinities in the features
    features = df[['Candies (#)', 'Mangoes (Kg)', 'Milk Packets (#)']]
    labels = df['Class']

    if features.isnull().values.any():
        features = features.fillna(0)

    if np.isinf(features).values.any():
        features.replace([np.inf, -np.inf], 0, inplace=True)

    X = features.values
    y = labels.values

    # Standardize features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

    # Train a logistic regression model
    model = LogisticRegression(max_iter=1000)  # Increase max_iter if necessary
    model.fit(X_train, y_train)

    # Predict on the test set
    y_pred = model.predict(X_test)

    # Return classification report with zero_division parameter
    return classification_report(y_test, y_pred, zero_division=1)

def main():
    """Main function to execute the tasks."""
    file_path = "/content/Lab Session Data.xlsx"
    sheet_name = 'Purchase data'
    df = load_data(file_path, sheet_name)

    # Selection of specific columns
    columns_to_print = df[['Customer', 'Candies (#)', 'Mangoes (Kg)', 'Milk Packets (#)', 'Payment (Rs)']]
    print("Selected columns:")
    print(columns_to_print)

    # Create matrices A and C
    A = df[['Candies (#)', 'Mangoes (Kg)', 'Milk Packets (#)']].values
    C = df[['Payment (Rs)']].values

    # Perform calculations
    dim_A = dimensions(A)
    num_vectors = count_vectors(A)
    rank_A = rank_of_matrix(A)
    X = compute_costs(A, C)

    print("\nMatrix A:")
    print(A)
    print("\nMatrix C:")
    print(C)
    print(f"\nDimensionality of the vector space: {dim_A}")
    print(f"Number of vectors in this vector space: {num_vectors}")
    print(f"Rank of Matrix A: {rank_A}")
    print("\nCost of each product available for sale:")
    print(X)

    # Classify customers
    classification_report_str = classify_customers(df)
    print("\nClassification Report:")
    print(classification_report_str)

# Execute the main function
if __name__ == "__main__":
    main()


Selected columns:
  Customer  Candies (#)  Mangoes (Kg)  Milk Packets (#)  Payment (Rs)
0      C_1           20             6                 2           386
1      C_2           16             3                 6           289
2      C_3           27             6                 2           393
3      C_4           19             1                 2           110
4      C_5           24             4                 2           280
5      C_6           22             1                 5           167
6      C_7           15             4                 2           271
7      C_8           18             4                 2           274
8      C_9           21             1                 4           148
9     C_10           16             2                 4           198

Matrix A:
[[20  6  2]
 [16  3  6]
 [27  6  2]
 [19  1  2]
 [24  4  2]
 [22  1  5]
 [15  4  2]
 [18  4  2]
 [21  1  4]
 [16  2  4]]

Matrix C:
[[386]
 [289]
 [393]
 [110]
 [280]
 [167]
 [271]
 [274]
 [148]
 [198]