In [1]:
import numpy as np

class NaiveBayes:

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self._classes = np.unique(y)
        n_classes = len(self._classes)

        # calculate mean, var, and prior for each class
        self._mean = np.zeros((n_classes, n_features), dtype=np.float64)
        self._var = np.zeros((n_classes, n_features), dtype=np.float64)
        self._priors = np.zeros(n_classes, dtype=np.float64)

        for idx, c in enumerate(self._classes):
            X_c = X[y == c]
            self._mean[idx, :] = X_c.mean(axis=0)
            self._var[idx, :] = X_c.var(axis=0)
            self._priors[idx] = X_c.shape[0] / float(n_samples)
            

    def predict(self, X):
        y_pred = [self._predict(x) for x in X]
        return np.array(y_pred)

    def _predict(self, x):
        posteriors = []

        # calculate posterior probability for each class
        for idx, c in enumerate(self._classes):
            prior = np.log(self._priors[idx])
            posterior = np.sum(np.log(self._pdf(idx, x)))
            posterior = posterior + prior
            posteriors.append(posterior)

        # return class with the highest posterior
        return self._classes[np.argmax(posteriors)]

    def _pdf(self, class_idx, x):
        mean = self._mean[class_idx]
        var = self._var[class_idx]
        numerator = np.exp(-((x - mean) ** 2) / (2 * var))
        denominator = np.sqrt(2 * np.pi * var)
        return numerator / denominator

In [2]:
import pandas as pd
df = pd.read_csv('tilapia_growth_final.csv')
df

Unnamed: 0,fish_size_category,protein_level_percent,feed_type,feeding_rate_percent,feeding_frequency_per_day,water_temperature_c,feeding_method,pellet_size_mm,pellet_hardness,natural_feeding_habit,stock_density_fish_per_m3,dissolved_oxygen_mgL,growth_efficiency
0,Juvenile_0.5_10g,39.8,Crumbles,6.6,4,28.7,Hand_Feeding,1.1,Soft,Plankton_Feeder,48.8,7.2,Good
1,Juvenile_0.5_10g,36.5,Crumbles,7.9,4,30.5,Hand_Feeding,0.9,Soft,Plankton_Feeder,11.9,7.9,Poor
2,Juvenile_10_35g,31.9,Juvenile_Pellet,13.6,3,26.7,Feeding_Bag,2.0,Soft,Omnivorous,48.6,7.0,Poor
3,Adult_35g_plus,27.2,Adult_Pellet,2.1,3,32.8,Hand_Feeding,3.2,Soft,Omnivorous,17.3,6.8,Good
4,Juvenile_10_35g,30.2,Juvenile_Pellet,9.0,4,28.4,Hand_Feeding,1.9,Soft,Omnivorous,17.8,3.2,Good
...,...,...,...,...,...,...,...,...,...,...,...,...,...
290,Adult_35g_plus,26.7,Adult_Pellet,2.8,3,24.8,Feeding_Bag,2.7,Soft,Omnivorous,33.2,6.2,Good
291,Adult_35g_plus,26.4,Adult_Pellet,2.7,4,33.0,Feeding_Bag,3.0,Medium,Omnivorous,44.7,6.6,Poor
292,Juvenile_10_35g,30.7,Crumbles,14.6,2,31.9,Hand_Feeding,1.0,Soft,Omnivorous,47.6,6.2,Poor
293,Juvenile_0.5_10g,38.5,Mash_Powder,11.0,3,22.6,Hand_Feeding,0.1,Soft,Plankton_Feeder,41.1,7.1,Poor


In [3]:
mappings = {
    'fish_size_category': {
        'Fry_Larvae': 0,
        'Juvenile_0.5_10g': 1,
        'Juvenile_10_35g': 2,
        'Adult_35g_plus': 3,
        'Broodstock': 4
    },
    'feed_type': {
        'Mash_Powder': 0,
        'Crumbles': 1,
        'Juvenile_Pellet': 2,
        'Adult_Pellet': 3
    },
    'feeding_method': {
        'Hand_Feeding': 0,
        'Feeding_Bag': 1
    },
    'pellet_hardness': {
        'Soft': 0,
        'Medium': 1,
        'Hard': 2
    },
    'natural_feeding_habit': {
        'Plankton_Feeder': 0,
        'Omnivorous': 1
    },
}

for column, mapping in mappings.items():
    if column in df.columns:
        df[column] = df[column].map(mapping)

In [4]:
df.head()

Unnamed: 0,fish_size_category,protein_level_percent,feed_type,feeding_rate_percent,feeding_frequency_per_day,water_temperature_c,feeding_method,pellet_size_mm,pellet_hardness,natural_feeding_habit,stock_density_fish_per_m3,dissolved_oxygen_mgL,growth_efficiency
0,1,39.8,1,6.6,4,28.7,0,1.1,0,0,48.8,7.2,Good
1,1,36.5,1,7.9,4,30.5,0,0.9,0,0,11.9,7.9,Poor
2,2,31.9,2,13.6,3,26.7,1,2.0,0,1,48.6,7.0,Poor
3,3,27.2,3,2.1,3,32.8,0,3.2,0,1,17.3,6.8,Good
4,2,30.2,2,9.0,4,28.4,0,1.9,0,1,17.8,3.2,Good


In [5]:
df['target'] = df['growth_efficiency'].map({'Poor': 0, 'Good': 1})
df.head()

Unnamed: 0,fish_size_category,protein_level_percent,feed_type,feeding_rate_percent,feeding_frequency_per_day,water_temperature_c,feeding_method,pellet_size_mm,pellet_hardness,natural_feeding_habit,stock_density_fish_per_m3,dissolved_oxygen_mgL,growth_efficiency,target
0,1,39.8,1,6.6,4,28.7,0,1.1,0,0,48.8,7.2,Good,1
1,1,36.5,1,7.9,4,30.5,0,0.9,0,0,11.9,7.9,Poor,0
2,2,31.9,2,13.6,3,26.7,1,2.0,0,1,48.6,7.0,Poor,0
3,3,27.2,3,2.1,3,32.8,0,3.2,0,1,17.3,6.8,Good,1
4,2,30.2,2,9.0,4,28.4,0,1.9,0,1,17.8,3.2,Good,1


In [6]:
X = df.drop(['growth_efficiency','target'], axis=1)
y = df['target']

In [7]:
X.head(1)

Unnamed: 0,fish_size_category,protein_level_percent,feed_type,feeding_rate_percent,feeding_frequency_per_day,water_temperature_c,feeding_method,pellet_size_mm,pellet_hardness,natural_feeding_habit,stock_density_fish_per_m3,dissolved_oxygen_mgL
0,1,39.8,1,6.6,4,28.7,0,1.1,0,0,48.8,7.2


In [8]:
y.head(1)

0    1
Name: target, dtype: int64

In [9]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)

In [10]:
# Testing
    # Imports
def accuracy(y_true, y_pred):
    accuracy = np.sum(y_true == y_pred) / len(y_true)
    return accuracy

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# Create and train Naive Bayes model
nb = NaiveBayes()
nb.fit(X_train_scaled, y_train)

predictions = nb.predict(X_test_scaled)

In [11]:
print("Naive Bayes classification accuracy", accuracy(y_test, predictions))

Naive Bayes classification accuracy 0.7288135593220338
