In [5]:
#import needed packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
import sklearn as sk
import scipy.io as sio



In [6]:
#Function Section

from scipy.stats import norm

def gaussian_kernel(x, x_i, bandwidth):
    distance = np.linalg.norm(x - x_i)
    kernel = np.exp(-distance**2/(2*bandwidth))
    return kernel

def kde(x, data, bandwidth):
    n = len(data)
    kde_estimate = np.zeros_like(x)
    for i in range(n):
        kde_estimate += gaussian_kernel(x, data[i], bandwidth)
    return kde_estimate / n

def compute_loocv(data, bandwidth):
    loocv_errors = []
    for i in range(len(data)):
        leave_out_data = np.delete(data, i)  # leave out data point at index i
        kde_estimate = kde(data[i], leave_out_data, bandwidth)
        squared_error = (kde_estimate - gaussian_kernel(data[i], data[i], bandwidth))**2
        loocv_errors.append(squared_error)
    return np.mean(loocv_errors)


In [7]:
#Load the data

#load data from matrix file, anomaly

dataset = sio.loadmat('Data/anomaly.mat')
X = dataset['X']
xtest1 = dataset['xtest1'][0][0]
xtest2 = dataset['xtest2'][0][0]


#from this file we have the X value, but we need to find the density function f

print(f'xtest point1: {xtest1}')
print(f'xtest point2: {xtest2}')



xtest point1: 2.85
xtest point2: -1.5


In [10]:
#Part A) Figure out the bandwidth parameter first

bandwidth_vals = np.linspace(0.1,4,100)

loocv_erros = [compute_loocv(X,bandwidth) for bandwidth in bandwidth_vals]

opt_band_width = bandwidth_vals[np.argmin(loocv_erros)]

print(loocv_erros)
print(opt_band_width)

[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
0.1
