In [1]:
import pandas as pd
import numpy as np
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
# Given data
information = {
    'age': ['<=30', '<=30', '31…40', '>40', '>40', '>40', '31…40', '<=30', '<=30', '>40', '<=30', '31…40', '31…40', '>40'],
    'income': ['high', 'high', 'high', 'medium', 'low', 'low', 'low', 'medium', 'low', 'medium', 'medium', 'medium', 'high', 'medium'],
    'student': ['no', 'no', 'no', 'no', 'yes', 'yes', 'yes', 'no', 'yes', 'yes', 'yes', 'no', 'yes', 'no'],
    'credit_rating': ['fair', 'excellent', 'fair', 'fair', 'fair', 'excellent', 'excellent', 'fair', 'fair', 'fair', 'excellent', 'excellent', 'fair', 'excellent'],
    'buys_computer': ['no', 'no', 'yes', 'yes', 'yes', 'no', 'yes', 'no', 'yes', 'yes', 'yes', 'yes', 'yes', 'no']
}

# Create a DataFrame
dataframe = pd.DataFrame(information)

In [5]:
#Encoding string data into numerical data
label = LabelEncoder()
encoded = dataframe.apply(label.fit_transform)
feature = encoded.drop('buys_computer', axis=1)
target = encoded['buys_computer']

In [7]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(feature, target, test_size=0.2, random_state=42)

# A1: prior probabilities for each class
priors_probs = y_train.value_counts(normalize=True).to_dict()

# A2: Calculate the class conditional densities 
conditional_densities = {}
for column in feature.columns:
    conditional_densities[column] = X_train.groupby(y_train)[column].value_counts(normalize=True).unstack().fillna(0).to_dict()

# A3: Test for independence between features
independence_feature = X_train.corr().to_dict()

# A4: Build a Naïve-Bayes classifier
Naive_model = GaussianNB()
Naive_model.fit(X_train, y_train)

# Make predictions on the test set
predictions = Naive_model.predict(X_test)

# Calculate accuracy of the predictions
accuracy = accuracy_score(y_test, predictions)



In [8]:
# Output the results
print("A1: Prior Probabilities:", priors_probs)
print("\nA2: Class Conditional Densities:", conditional_densities)
print("\nA3: Feature Independence:", independence_feature)
print("\nA4: Naïve-Bayes Classifier Test Set Accuracy:", accuracy)


A1: Prior Probabilities: {1: 0.6363636363636364, 0: 0.36363636363636365}

A2: Class Conditional Densities: {'age': {0: {0: 0.0, 1: 0.42857142857142855}, 1: {0: 0.5, 1: 0.2857142857142857}, 2: {0: 0.5, 1: 0.2857142857142857}}, 'income': {0: {0: 0.25, 1: 0.2857142857142857}, 1: {0: 0.25, 1: 0.42857142857142855}, 2: {0: 0.5, 1: 0.2857142857142857}}, 'student': {0: {0: 0.75, 1: 0.2857142857142857}, 1: {0: 0.25, 1: 0.7142857142857143}}, 'credit_rating': {0: {0: 0.75, 1: 0.2857142857142857}, 1: {0: 0.25, 1: 0.7142857142857143}}}

A3: Feature Independence: {'age': {'age': 1.0, 'income': 0.5657894736842105, 'student': -0.12565617248750865, 'credit_rating': -0.12565617248750865}, 'income': {'age': 0.5657894736842105, 'income': 1.0, 'student': -0.12565617248750863, 'credit_rating': -0.12565617248750865}, 'student': {'age': -0.12565617248750865, 'income': -0.12565617248750863, 'student': 1.0, 'credit_rating': -0.10000000000000005}, 'credit_rating': {'age': -0.12565617248750865, 'income': -0.12565

In [9]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn import preprocessing as prepro 
from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
#Insert your features and labels
samples = np.load('activation_maps_lines_image52_pixel.npy')
features = np.load('activation_maps_lines_image52_labelpixel.npy')

print("The shape of samples and featuresis:\n ",samples.shape,features.shape)
# Reshape your data  (pixels x features)
samples_num = samples.shape[0]   # Total number of pixels
features_num = samples.shape[1]  # Number of features for each pixel
X_reshaped = samples.reshape(samples_num,features_num)
y_reshaped = features.reshape(samples_num)  # Assuming y is your pixel-wise label mask
print(X_reshaped.shape,y_reshaped.shape)
dataframe = pd.DataFrame(samples,columns=np.arange(0,13))
dataframe['13']=features # Adding features coulmn into the samples dataset 
dataframe.info()

The shape of samples and featuresis:
  (16384, 13) (16384, 1)
(16384, 13) (16384,)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16384 entries, 0 to 16383
Data columns (total 14 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   0       16384 non-null  float64
 1   1       16384 non-null  float64
 2   2       16384 non-null  float64
 3   3       16384 non-null  float64
 4   4       16384 non-null  float64
 5   5       16384 non-null  float64
 6   6       16384 non-null  float64
 7   7       16384 non-null  float64
 8   8       16384 non-null  float64
 9   9       16384 non-null  float64
 10  10      16384 non-null  float64
 11  11      16384 non-null  float64
 12  12      16384 non-null  float64
 13  13      16384 non-null  uint8  
dtypes: float64(13), uint8(1)
memory usage: 1.6 MB


In [10]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_reshaped, y_reshaped, test_size=0.2, random_state=42)

# Build and train a Naive Bayes classifier
Naive_Bayes = GaussianNB()
Naive_Bayes.fit(X_train, y_train)

# Test the accuracy of the Naive Bayes classifier
accuracy = Naive_Bayes.score(X_test, y_test)
print(f"Naive Bayes Classifier Accuracy: {accuracy}")

# If needed, you can also predict on the test set and evaluate other metrics
predictions = Naive_Bayes.predict(X_test)

Naive Bayes Classifier Accuracy: 0.13976197741837046
