## Imports

In [3]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

In [4]:
# Defining the Shuffling Function
def my_shuffle_data(X, y):
    idx = np.arange(X.shape[0])
    np.random.shuffle(idx)
    return X.iloc[idx], y.iloc[idx]

# Claculatig the Accuracy Function
def calculate_accuracy(predictions, y):
    return np.mean(predictions == y) * 100

# Defining the Linear Classifier Function
def linear_classifier(x_train, y_train):
        b = np.ones((x_train.shape[0], 1))
        x = np.hstack((b, x_train))
        w = np.dot(np.dot(np.linalg.inv(np.dot(x.T, x)), x.T), y_train)
        return w

# Predicting the Labels Function
def predict_labels(x, w):
    b = np.ones((x.shape[0], 1))
    x = np.hstack((b, x))
    y = np.dot(x, w)
    predictions = np.where(y >= 0.5, 1, 0)
    return predictions

# Feature Reduction using LDA (Linear Discriminant Analysis)
def feature_reduction(X, y, num_components):
    lda = LinearDiscriminantAnalysis(n_components=num_components)
    X_reduced = lda.fit_transform(X, y)
    return X_reduced

## Applying the LDA on the features of the dataset

In [5]:
# Reading the Dataset
data = pd.read_csv('/kaggle/input/heart-attack-analysis-prediction-dataset/heart.csv')

# Splitting the Data into Features and Labels
X = data.drop('output', axis=1)
y = data['output']

# Calculating the Mean and Standard Deviation of the Features
mean_features = X.mean()
stddev_features = X.std()

# Concatenate Mean and Standard Deviation Features with Original Features
X_mean_stddev = pd.concat([mean_features, stddev_features], axis=0)
X_mean_stddev.columns = ['mean', 'stddev']

# Replicating the Mean and Standard Deviation Features for all Samples
X_mean_stddev = np.tile(X_mean_stddev.values.flatten(), (X.shape[0], 1))

# Applying the LDA for Feature Reduction
num_components = 1
X_reduced = pd.DataFrame(feature_reduction(X_mean_stddev, y, num_components))

# Splitting the Data into Training and Testing Sets
x_train, x_test, y_train, y_test = train_test_split(X_reduced, y, test_size=0.2)

# Calculating the Weights
best_w = linear_classifier(x_train, y_train)

# Predicting the Labels
y_pred = predict_labels(x_test, best_w)

# Calculating the Accuracy
accuracy = calculate_accuracy(y_pred, y_test)
print("The accuracy is:", accuracy)

The accuracy is: 57.377049180327866


## Applying the LDA directly on the dataset

In [6]:
# Reading the Dataset
data = pd.read_csv('/kaggle/input/heart-attack-analysis-prediction-dataset/heart.csv')

# Splitting the Data into Features and Labels
X = data.drop('output', axis=1)
y = data['output']

# Applying the LDA for Feature Reduction
num_components = 1
X_reduced = pd.DataFrame(feature_reduction(X, y, num_components))

# Splitting the Data into Training and Testing Sets
x_train, x_test, y_train, y_test = train_test_split(X_reduced, y, test_size=0.2)

# Calculating the Weights
best_w = linear_classifier(x_train, y_train)

# Predicting the Labels
y_pred = predict_labels(x_test, best_w)

# Calculating the Accuracy
accuracy = calculate_accuracy(y_pred, y_test)
print("The accuracy is:", accuracy)

The accuracy is: 93.44262295081968
