# import libraries

In [1]:
import pandas as pd
import numpy as np

# read dataset

In [2]:
df = pd.read_csv('iris_dataset.csv')
df.head(1).T

Unnamed: 0,0
sepal length (cm),5.1
sepal width (cm),3.5
petal length (cm),1.4
petal width (cm),0.2
target,0.0


# separate features and labels

In [3]:
X = df.drop(columns='target')
X.head(1).T

Unnamed: 0,0
sepal length (cm),5.1
sepal width (cm),3.5
petal length (cm),1.4
petal width (cm),0.2


In [4]:
Y = df[['target']]
Y.head(1).T

Unnamed: 0,0
target,0


In [5]:
statistics_df = pd.DataFrame()

for i in range(3):
    for col in df.columns[:-1]:
        mask = df['target'] == i
        statistics_df.loc[i, f"{col}_mean"] = df.loc[mask, col].mean()
        statistics_df.loc[i, f"{col}_std"]  = df.loc[mask, col].std()
        statistics_df.loc[i, f"{col}_min"] = df.loc[mask, col].min()
        statistics_df.loc[i, f"{col}_max"] = df.loc[mask, col].max()
        statistics_df.loc[i, f"{col}_range"] = statistics_df.loc[i, f"{col}_max"] - statistics_df.loc[i, f"{col}_min"]

In [6]:
statistics_df.T

Unnamed: 0,0,1,2
sepal length (cm)_mean,5.006,5.936,6.588
sepal length (cm)_std,0.35249,0.516171,0.63588
sepal length (cm)_min,4.3,4.9,4.9
sepal length (cm)_max,5.8,7.0,7.9
sepal length (cm)_range,1.5,2.1,3.0
sepal width (cm)_mean,3.428,2.77,2.974
sepal width (cm)_std,0.379064,0.313798,0.322497
sepal width (cm)_min,2.3,2.0,2.2
sepal width (cm)_max,4.4,3.4,3.8
sepal width (cm)_range,2.1,1.4,1.6


# find class probabilities
* define gaussian function
* extract features
* run gaussian function
* find class with max prob
* convert class numbers to class names
* calculate precision

In [7]:
def gaussian(x, mu, sigma):
    return (1 / (np.sqrt(2 * np.pi) * sigma)) * np.exp(-0.5 * ((x - mu) / sigma)**2)

In [8]:
X = df[['petal length (cm)']].values
print(X.shape)

x_vals = X[:, 0] 
print(x_vals.shape)

(150, 1)
(150,)


In [9]:
def apply_guassian(x_vals, n_classes, cols):
    n_samples = len(x_vals)
    n_cols = len(cols)
    gaussian_results = np.zeros((n_samples, n_classes, n_cols))

    for i_col, col in enumerate(cols):
        X = x_vals[:, i_col]
        for cls in range(n_classes):
            mu = statistics_df.loc[cls, f"{col}_mean"]
            sigma = statistics_df.loc[cls, f"{col}_std"]
            gaussian_results[:, cls, i_col] = gaussian(X, mu, sigma)

    return gaussian_results

In [10]:
gaussian_results = apply_guassian(x_vals[:, np.newaxis], 3, ['petal length (cm)']).squeeze()
gaussian_results.shape

(150, 3)

In [11]:
def predict_class(gaussian_results):
    if (len(gaussian_results.shape) == 2):
        return np.argmax(gaussian_results, axis=1)
    else:
        sorted_arr = np.sort(gaussian_results, axis=2)
        last_two = sorted_arr[:, :, -2:]
        mean_last_two = np.mean(last_two, axis=2)  
        return np.argmax(mean_last_two, axis=1)
predicted_class = predict_class(gaussian_results)
predicted_class

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [12]:
def convert_labels(predicted_class):
    mapping = {0: 'blue', 1: 'black', 2: 'red'}
    predicted_labels = [mapping[c] for c in predicted_class]
    return predicted_labels

predicted_labels = convert_labels(predicted_class)
predicted_labels

['blue',
 'blue',
 'blue',
 'blue',
 'blue',
 'blue',
 'blue',
 'blue',
 'blue',
 'blue',
 'blue',
 'blue',
 'blue',
 'blue',
 'blue',
 'blue',
 'blue',
 'blue',
 'blue',
 'blue',
 'blue',
 'blue',
 'blue',
 'blue',
 'blue',
 'blue',
 'blue',
 'blue',
 'blue',
 'blue',
 'blue',
 'blue',
 'blue',
 'blue',
 'blue',
 'blue',
 'blue',
 'blue',
 'blue',
 'blue',
 'blue',
 'blue',
 'blue',
 'blue',
 'blue',
 'blue',
 'blue',
 'blue',
 'blue',
 'blue',
 'black',
 'black',
 'red',
 'black',
 'black',
 'black',
 'black',
 'black',
 'black',
 'black',
 'black',
 'black',
 'black',
 'black',
 'black',
 'black',
 'black',
 'black',
 'black',
 'black',
 'black',
 'black',
 'red',
 'black',
 'black',
 'black',
 'black',
 'red',
 'black',
 'black',
 'black',
 'black',
 'black',
 'red',
 'black',
 'black',
 'black',
 'black',
 'black',
 'black',
 'black',
 'black',
 'black',
 'black',
 'black',
 'black',
 'black',
 'black',
 'black',
 'black',
 'red',
 'red',
 'red',
 'red',
 'red',
 'red',
 'black',


In [13]:
def calculate_precision(predicted_labels, from_index, to_index, class_name):
    class_samples = predicted_labels[from_index:to_index]
    correct_answers = [i == class_name for i in class_samples]
    return ((sum(correct_answers))/(len(correct_answers))) * 100

class_blue_correct_answers = calculate_precision(predicted_labels, 0, 50, 'blue')
class_black_correct_answers = calculate_precision(predicted_labels, 50, 100, 'black')
class_red_correct_answers = calculate_precision(predicted_labels, 100, 150, 'red')

print(f'class_blue_correct_answers: {class_blue_correct_answers}%\n')
print(f'class_black_correct_answers: {class_black_correct_answers}%\n')
print(f'class_red_correct_answers: {class_red_correct_answers}%\n')

class_blue_correct_answers: 100.0%

class_black_correct_answers: 92.0%

class_red_correct_answers: 94.0%



# read test file and calculate precision

In [14]:
test_df = pd.read_csv('iris_test_samples.csv')
test_df.shape

(10, 5)

In [15]:
test_df.columns

Index(['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'label'], dtype='object')

In [16]:
test_df = test_df[['petal_length', 'label']]
test_df.shape

(10, 2)

In [17]:
X_test = test_df['petal_length'].to_numpy()
Y_test = test_df['label'].to_numpy()

print(f'X_test:\n{X_test}\n\n')
print(f'Y_test:\n{Y_test}\n\n')

X_test:
[1.5 1.4 1.7 4.  4.2 4.7 5.5 5.8 5.9 5.1]


Y_test:
[0 0 0 1 1 1 2 2 2 2]




In [18]:
gaussian_results = apply_guassian(X_test[:, np.newaxis], 3, ['petal length (cm)']).squeeze()
print(f"gaussian_results: {gaussian_results}\n")

predicted_class = predict_class(gaussian_results)
print(f"predicted_class: {predicted_class}\n")

predicted_labels = convert_labels(predicted_class)
print(f"predicted_labels: {predicted_labels}\n")

class_blue_correct_answers = calculate_precision(predicted_labels, 0, 3, 'blue')
class_black_correct_answers = calculate_precision(predicted_labels, 3, 6, 'black')
class_red_correct_answers = calculate_precision(predicted_labels, 6, 10, 'red')

print(f'class_blue_correct_answers: {class_blue_correct_answers}%\n')
print(f'class_black_correct_answers: {class_black_correct_answers}%\n')
print(f'class_red_correct_answers: {class_red_correct_answers}%\n')

gaussian_results: [[2.24286648e+000 2.74074461e-008 1.42495896e-012]
 [2.15537744e+000 7.67726912e-009 3.70613589e-013]
 [8.98175851e-001 3.04922265e-007 1.90892598e-011]
 [9.60648664e-047 7.28480142e-001 1.38623331e-002]
 [2.42708759e-054 8.42081823e-001 3.59667581e-002]
 [7.43660363e-076 5.47660394e-001 2.19553863e-001]
 [9.14952948e-118 2.61121870e-002 7.19657913e-001]
 [7.39853222e-136 3.95094512e-003 6.53440887e-001]
 [3.55134629e-142 1.92302481e-003 5.92537946e-001]
 [1.17055718e-095 1.71798248e-001 5.16893507e-001]]

predicted_class: [0 0 0 1 1 1 2 2 2 2]

predicted_labels: ['blue', 'blue', 'blue', 'black', 'black', 'black', 'red', 'red', 'red', 'red']

class_blue_correct_answers: 100.0%

class_black_correct_answers: 100.0%

class_red_correct_answers: 100.0%



# use two features

In [19]:
test_df = pd.read_csv('iris_test_samples.csv')
test_df = test_df[['petal_length', 'petal_width', 'label']]
X_test = test_df[['petal_length', 'petal_width']].to_numpy()
Y_test = test_df['label'].to_numpy()

print(f'X_test:\n{X_test}\n\n')
print(f'Y_test:\n{Y_test}\n\n')

X_test:
[[1.5 0.2]
 [1.4 0.1]
 [1.7 0.4]
 [4.  1.3]
 [4.2 1.5]
 [4.7 1.4]
 [5.5 2. ]
 [5.8 2.2]
 [5.9 2.1]
 [5.1 1.9]]


Y_test:
[0 0 0 1 1 1 2 2 2 2]




In [20]:
gaussian_results = apply_guassian(X_test, 3, ['petal length (cm)', 'petal width (cm)'])
print(f"gaussian_results.shape: {gaussian_results.shape}\n")

predicted_class = predict_class(gaussian_results)
print(f"predicted_class: {predicted_class}\n")

predicted_labels = convert_labels(predicted_class)
print(f"predicted_labels: {predicted_labels}\n")

class_blue_correct_answers = calculate_precision(predicted_labels, 0, 3, 'blue')
class_black_correct_answers = calculate_precision(predicted_labels, 3, 6, 'black')
class_red_correct_answers = calculate_precision(predicted_labels, 6, 10, 'red')

print(f'class_blue_correct_answers: {class_blue_correct_answers}%\n')
print(f'class_black_correct_answers: {class_black_correct_answers}%\n')
print(f'class_red_correct_answers: {class_red_correct_answers}%\n')

gaussian_results.shape: (10, 3, 2)

predicted_class: [0 0 0 1 1 1 2 2 2 2]

predicted_labels: ['blue', 'blue', 'blue', 'black', 'black', 'black', 'red', 'red', 'red', 'red']

class_blue_correct_answers: 100.0%

class_black_correct_answers: 100.0%

class_red_correct_answers: 100.0%



# use 4 features

In [None]:
test_df = pd.read_csv('iris_test_samples.csv')
test_df = test_df[['petal_length', 'petal_width', 'label']]
X_test = test_df[['petal_length', 'petal_width']].to_numpy()
Y_test = test_df['label'].to_numpy()

print(f'X_test:\n{X_test}\n\n')
print(f'Y_test:\n{Y_test}\n\n')

