In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.model_selection import KFold
from sklearn.decomposition import PCA

In [None]:


dataset = pd.read_csv('dataset.csv')
X = dataset.iloc[:, :-1]
y = dataset.iloc[:, -1]
# print(np.unique(y))
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
class_conditional_likelihoods = {}
classes = y_train.unique()

for class_label in classes:
    class_conditional_likelihoods[class_label] = {}
    X_class = X_train[y_train == class_label]
    for feature in X_train.columns:
        mean = X_class[feature].mean()
        std = X_class[feature].std()
        class_conditional_likelihoods[class_label][feature] = (mean, std)

def predict(X):
    predictions = []
    for _, row in X.iterrows():
        max_likelihood = -np.inf
        predicted_class = None
        for class_label in classes:
            class_likelihood = 0
            for feature, (mean, std) in class_conditional_likelihoods[class_label].items():
                if std == 0:
                    std = 1e-6  # to avoid division by 0
                exponent = -(row[feature] - mean) ** 2 / (2 * std ** 2)
                class_likelihood += np.log(1 / (np.sqrt(2 * np.pi) * std)) + exponent
            if class_likelihood > max_likelihood:
                max_likelihood = class_likelihood
                predicted_class = class_label
        predictions.append(predicted_class)
    return predictions

y_pred = predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)


Accuracy: 0.9291666666666667


In [None]:

def predict_one_instance(instance):
    max_likelihood = -np.inf
    predicted_class = None
    for class_label in classes:
        class_likelihood = 0
        for feature, (mean, std) in class_conditional_likelihoods[class_label].items():
            if std == 0:
                std = 1e-6  # to avoid division by 0
            exponent = -(instance[feature] - mean) ** 2 / (2 * std ** 2)
            class_likelihood += np.log(1 / (np.sqrt(2 * np.pi) * std)) + exponent
        if class_likelihood > max_likelihood:
            max_likelihood = class_likelihood
            predicted_class = class_label
    return predicted_class

x_input = X_test.iloc[0]
predicted_output = predict_one_instance(x_input)
actual_output = y_test.iloc[0]
print("Predicted Output:", predicted_output)
print("Actual Output:", actual_output)


Predicted Output: vocal
Actual Output: vocal


PCA+K Fold cross validation

In [None]:
def predict(X, class_conditional_likelihoods):
    predictions = []
    for _, row in X.iterrows():
        max_likelihood = -np.inf
        predicted_class = None
        for class_label in class_conditional_likelihoods.keys():
            class_likelihood = 0
            for feature, (mean, std) in class_conditional_likelihoods[class_label].items():
                if std == 0:
                    std = 1e-6  # to avoid division by 0
                exponent = -(row[feature] - mean) ** 2 / (2 * std ** 2)
                class_likelihood += np.log(1 / (np.sqrt(2 * np.pi) * std)) + exponent
            if class_likelihood > max_likelihood:
                max_likelihood = class_likelihood
                predicted_class = class_label
        predictions.append(predicted_class)
    return predictions


In [None]:
min_components = 10  # Adjust as needed
pca = PCA(n_components=min_components)
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)
num_folds = 6
kf = KFold(n_splits=num_folds, shuffle=True, random_state=42)
cv_scores = []

for train_index, val_index in kf.split(X_train):
    X_train_fold, X_val_fold = X_train.iloc[train_index], X_train.iloc[val_index]
    y_train_fold, y_val_fold = y_train.iloc[train_index], y_train.iloc[val_index]

    # class-conditional likelihoods
    class_conditional_likelihoods_fold = {}
    classes_fold = y_train_fold.unique()
    for class_label in classes_fold:
        class_conditional_likelihoods_fold[class_label] = {}
        X_class_fold = X_train_fold[y_train_fold == class_label]
        for feature in X_train_fold.columns:
            mean = X_class_fold[feature].mean()
            std = X_class_fold[feature].std()
            class_conditional_likelihoods_fold[class_label][feature] = (mean, std)

    y_val_pred = predict(X_val_fold, class_conditional_likelihoods_fold)
    accuracy_fold = accuracy_score(y_val_fold, y_val_pred)
    cv_scores.append(accuracy_fold)

print("Average Cross-Validation Accuracy:", np.mean(cv_scores))
print("Highest Cross-Validation Accuracy:", np.max(cv_scores))

Average Cross-Validation Accuracy: 0.9041666666666667
Highest Cross-Validation Accuracy: 0.925


In [None]:
max_cv_accuracy_scores = [0.925,92.5]

In [None]:
num_components = 50  # total comp=60
pca = PCA(n_components=num_components)
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)

# class-conditional likelihoods
class_conditional_likelihoods = {}
classes = y_train.unique()
for class_label in classes:
    class_conditional_likelihoods[class_label] = {}
    X_class = X_train[y_train == class_label]
    for feature in X_train.columns:
        mean = X_class[feature].mean()
        std = X_class[feature].std()
        class_conditional_likelihoods[class_label][feature] = (mean, std)

# Predict on the validation set
y_val_pred = predict(X_test, class_conditional_likelihoods)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_val_pred)

print("Accuracy:", accuracy)


Accuracy: 0.9291666666666667
