In [1]:
import numpy as np
import pandas as pd
from scipy.stats import norm
import os
from sklearn.model_selection import train_test_split
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import cv2

## Bayes Features MLE

In [2]:
data_path = 'data.csv'
data = pd.read_csv(data_path)
main_folder = 'data'

def load_frames(video_id):
    folder = os.path.join(main_folder, str(video_id))
    frames = []
    for img in sorted(os.listdir(folder)):
        img_path = os.path.join(folder, img)
        img = Image.open(img_path)
        frames.append(np.array(img).flatten())
    return np.array(frames).flatten()  

features = []
labels = []
for index, row in data.iterrows():
    video_id = row['video_id']
    label_id = row['label_id']
    feature_vector = load_frames(video_id)
    features.append(feature_vector)
    labels.append(label_id)

X = np.array(features)
y = np.array(labels)

In [3]:
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5)

In [2]:
def estimate_params(features, labels):
    params = {}
    unique_labels = np.unique(labels)
    for label in unique_labels:
        class_features = features[labels == label]
        params[label] = {
            'mean': np.mean(class_features, axis=0),
            'std': np.std(class_features, axis=0)
        }
    return params

In [5]:
params = estimate_params(X_train, y_train)

In [3]:
def bayesian_classifier(test_features, params, class_priors):
    posteriors = []
    for label, stats in params.items():
        prior = class_priors[label]
        log_likelihood = np.sum(norm.logpdf(test_features, stats['mean'], stats['std']))
        posterior = log_likelihood + np.log(prior)
        posteriors.append((label, posterior))
    
    return max(posteriors, key=lambda x: x[1])[0]

In [7]:
class_priors = {label: np.mean(y_train == label) for label in np.unique(y_train)}

In [8]:
predicted_labels = [bayesian_classifier(feat, params, class_priors) for feat in X_val]

accuracy = np.mean(predicted_labels == y_val)
print(f'Validation Accuracy: {accuracy*100}')

Validation Accuracy: 15.160703456640388


## Bayes Featurs MLE with Normalization

In [4]:
data_path = 'data.csv'
data = pd.read_csv(data_path)
main_folder = 'data'

def load_frames(video_id):
    folder = os.path.join(main_folder, str(video_id))
    frames = []
    for img in sorted(os.listdir(folder)):
        img_path = os.path.join(folder, img)
        img = Image.open(img_path)
        img = np.array(img) / 255
        frames.append(img.flatten())
    return np.array(frames).flatten()  

features = []
labels = []
for index, row in data.iterrows():
    video_id = row['video_id']
    label_id = row['label_id']
    feature_vector = load_frames(video_id)
    features.append(feature_vector)
    labels.append(label_id)

X = np.array(features)
y = np.array(labels)

In [6]:
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5)

In [7]:
params = estimate_params(X_train, y_train)

In [8]:
class_priors = {label: np.mean(y_train == label) for label in np.unique(y_train)}

In [9]:
predicted_labels = [bayesian_classifier(feat, params, class_priors) for feat in X_val]

accuracy = np.mean(predicted_labels == y_val)
print(f'Validation Accuracy: {accuracy*100}')

Validation Accuracy: 14.918132201334142


: 

## Bayes Feature MLE on GrayScale

In [12]:
data_path = 'data.csv'
data = pd.read_csv(data_path)
main_folder = 'data'

def load_frames(video_id):
    folder = os.path.join(main_folder, str(video_id))
    frames = []
    for img in sorted(os.listdir(folder)):
        img_path = os.path.join(folder, img)
        img = Image.open(img_path).convert('L')
        frames.append(np.array(img).flatten())
    return np.array(frames).flatten()  

features = []
labels = []
for index, row in data.iterrows():
    video_id = row['video_id']
    label_id = row['label_id']
    feature_vector = load_frames(video_id)
    features.append(feature_vector)
    labels.append(label_id)

X = np.array(features)
y = np.array(labels)

In [13]:
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5)

In [14]:
params = estimate_params(X_train, y_train)

In [15]:
class_priors = {label: np.mean(y_train == label) for label in np.unique(y_train)}

In [16]:
predicted_labels = [bayesian_classifier(feat, params, class_priors) for feat in X_val]

accuracy = np.mean(predicted_labels == y_val)
print(f'Validation Accuracy: {accuracy*100}')

Validation Accuracy: 14.493632504548213


## Bayes Feature MLE on Mean

In [17]:
data_path = 'data.csv'
data = pd.read_csv(data_path)
main_folder = 'data'

def load_frames(video_id):
    folder = os.path.join(main_folder, str(video_id))
    frames = []
    for img in sorted(os.listdir(folder)):
        img_path = os.path.join(folder, img)
        img = Image.open(img_path).convert('L')
        img_array = np.array(img).flatten()
        frames.append(img_array)
        avg_frame = np.mean(frames, axis=0)
    return avg_frame.flatten()  

features = []
labels = []
for index, row in data.iterrows():
    video_id = row['video_id']
    label_id = row['label_id']
    feature_vector = load_frames(video_id)
    features.append(feature_vector)
    labels.append(label_id)

X = np.array(features)
y = np.array(labels)

In [18]:
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5)

In [19]:
params = estimate_params(X_train, y_train)

In [20]:
class_priors = {label: np.mean(y_train == label) for label in np.unique(y_train)}

In [21]:
predicted_labels = [bayesian_classifier(feat, params, class_priors) for feat in X_val]

accuracy = np.mean(predicted_labels == y_val)
print(f'Validation Accuracy: {accuracy*100}')

Validation Accuracy: 15.463917525773196


## Bayes Feature MLE with LDA

In [None]:
def extract_features(video_id, base_path, lda):
    features = []
    folder_path = os.path.join(base_path, str(video_id))
    if os.path.exists(folder_path):
        for frame in os.listdir(folder_path):
            frame_path = os.path.join(folder_path, frame)
            img = cv2.imread(frame_path, cv2.IMREAD_GRAYSCALE)
            if img is not None:
                img = cv2.resize(img, (32, 32)) 
                features.append(img.flatten())
    
    if features:
        features = np.array(features)
        lda_features = lda.transform(features)
        lda_features = np.mean(lda_features, axis=0)
        return lda_features
    else:
        return np.zeros(lda.n_components)

In [None]:
data = pd.read_csv('data.csv')
base_path = 'data'

In [None]:
def train_lda(X, y, n_components=7):
    lda = LinearDiscriminantAnalysis(n_components=n_components)
    lda.fit(X, y)
    return lda

all_features = []
all_labels = []
for _, row in data.iterrows():
    video_id = row['video_id']
    label = row['label_id']
    folder_path = os.path.join(base_path, str(video_id))
    if os.path.exists(folder_path):
        for frame in os.listdir(folder_path):
            frame_path = os.path.join(folder_path, frame)
            img = cv2.imread(frame_path, cv2.IMREAD_GRAYSCALE)
            if img is not None:
                img = cv2.resize(img, (32, 32))
                all_features.append(img.flatten())
                all_labels.append(label)

all_features = np.array(all_features)
all_labels = np.array(all_labels)


lda = train_lda(all_features, all_labels)

In [None]:
data['features'] = data['video_id'].apply(lambda x: extract_features(x, base_path, lda))
X = np.vstack(data['features'].values)
y = data['label_id'].values

In [None]:
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5)

In [None]:
params = estimate_params(X_train, y_train)

In [None]:
predicted_labels = [bayesian_classifier(feat, params, class_priors) for feat in X_val]

accuracy = np.mean(predicted_labels == y_val)
print(f'Validation Accuracy: {accuracy*100}')

## Mean and Variance of Error Rate of Bayes Classifier

In [None]:
def error_mean_var(X, y, n_iterations=10, test_size=0.2):
    error_rates = []
    for i in range(n_iterations):
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)

        bayes = BayesClassifier(X_train, y_train)
        bayes.fit()
        y_pred = bayes.predict(X_test)
        error_rate = 1 - np.mean(y_pred == y_test)
        error_rates.append(error_rate)

        print ("Iteration:", i+1, "Error Rate:", error_rate)
    
    mean_error_rate = np.mean(error_rates)
    variance_error_rate = np.var(error_rates)
    
    return mean_error_rate, variance_error_rate


mean_error_rate, variance_error_rate = error_mean_var(X, y)

print("Mean of Error Rate:", mean_error_rate)
print("Variance of Error Rate:", variance_error_rate)