In [2]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.metrics import accuracy_score, classification_report

# Generate a synthetic dataset with three numerical features
data_frame = pd.DataFrame({
    'Feature_X': np.random.rand(100) * 10,
    'Feature_Y': np.random.rand(100) * 5,
    'Feature_Z': np.random.rand(100) * 20,
})

# Create a binary target variable based on Feature_X and Feature_Y
data_frame['Target_Class'] = (data_frame['Feature_X'] + data_frame['Feature_Y'] > 7).astype(int)

# Define independent variables (features) and dependent variable (target class)
predictors = data_frame[['Feature_X', 'Feature_Y', 'Feature_Z']]
target = data_frame['Target_Class']

# Split the dataset into training and testing subsets
X_train, X_test, y_train, y_test = train_test_split(predictors, target, test_size=0.2, random_state=42)

# Initialize and train the logistic regression model
logistic_model = LogisticRegression()
logistic_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred_logistic = logistic_model.predict(X_test)

# Evaluate logistic regression model performance
logistic_accuracy = accuracy_score(y_test, y_pred_logistic)
logistic_report = classification_report(y_test, y_pred_logistic)

print(f"Logistic Regression Accuracy: {logistic_accuracy:.4f}")
print("Classification Report:\n", logistic_report)

# Predict the class for a new set of inputs
sample_input = np.array([[5, 2, 10]])  # Example input values
predicted_class_logistic = logistic_model.predict(sample_input)
print(f"Predicted class for input {sample_input}: {predicted_class_logistic[0]}")

# Initialize and train the Linear Discriminant Analysis (LDA) model
lda_model = LDA()
lda_model.fit(X_train, y_train)
y_pred_lda = lda_model.predict(X_test)
lda_accuracy = accuracy_score(y_test, y_pred_lda)
print(f"LDA Accuracy: {lda_accuracy:.4f}")

# LDA using matrix operations
X_train_df = pd.DataFrame(X_train, columns=['Feature_X', 'Feature_Y', 'Feature_Z'])
y_train_df = pd.Series(y_train)

mean_vectors = []
for cls in np.unique(y_train):
    mean_vectors.append(np.mean(X_train_df[y_train_df == cls], axis=0))

S_W = np.zeros((X_train.shape[1], X_train.shape[1]))
for cls, mean_vec in zip(np.unique(y_train), mean_vectors):
    class_scatter = np.zeros((X_train.shape[1], X_train.shape[1]))
    subset = X_train_df[y_train_df == cls].values
    mean_vec = mean_vec.to_numpy().reshape(-1, 1)
    for row in subset:
        row = row.reshape(-1, 1)
        class_scatter += (row - mean_vec).dot((row - mean_vec).T)
    S_W += class_scatter

mean_overall = np.mean(X_train_df, axis=0).to_numpy().reshape(-1, 1)
S_B = np.zeros((X_train.shape[1], X_train.shape[1]))
for cls, mean_vec in zip(np.unique(y_train), mean_vectors):
    n = X_train_df[y_train_df == cls].shape[0]
    mean_vec_arr = mean_vec.to_numpy().reshape(-1, 1)
    S_B += n * (mean_vec_arr - mean_overall).dot((mean_vec_arr - mean_overall).T)

# Compute eigenvalues and eigenvectors for the LDA transformation
eigen_values, eigen_vectors = np.linalg.eig(np.linalg.inv(S_W).dot(S_B))

print("Eigenvalues:", eigen_values)
print("Eigenvectors:\n", eigen_vectors)


Logistic Regression Accuracy: 1.0000
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        12
           1       1.00      1.00      1.00         8

    accuracy                           1.00        20
   macro avg       1.00      1.00      1.00        20
weighted avg       1.00      1.00      1.00        20

Predicted class for input [[ 5  2 10]]: 1
LDA Accuracy: 1.0000
Eigenvalues: [ 2.47611942e+00 -4.28056907e-18  3.56704885e-17]
Eigenvectors:
 [[ 0.65539459  0.13541457 -0.35944165]
 [ 0.75485981 -0.49155364  0.92146948]
 [-0.02538911  0.86025456  0.14729462]]


