In [44]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
import numpy as np
from sklearn.model_selection import train_test_split
from pathlib import Path

# Load dataset
data_file = 'data/geometric_features.csv'
df = pd.read_csv(data_file)

# Extract features and labels
features = ['area', 'perimeter', 'circularity', 'aspect_ratio', 'centroid_x', 'centroid_y', 'solidity', 'extent']
x = df[features].values
y = df['label'].values

# Encode labels to integers
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# Normalize feature values
scaler = MinMaxScaler()
x = scaler.fit_transform(x)

# Define neural network structure
n_hidden = 5
l_rate = 0.3
n_epoch = 500

def initialize_network(n_inputs, n_hidden, n_outputs):
    network = [
        [{'weights': np.random.rand(n_inputs + 1)} for _ in range(n_hidden)],
        [{'weights': np.random.rand(n_hidden + 1)} for _ in range(n_outputs)]
    ]
    return network

def sigmoid(x, derivative=False):
    if derivative:
        return x * (1.0 - x)
    return 1.0 / (1.0 + np.exp(-x))

def forward_propagate(network, row):
    inputs = row
    for layer in network:
        new_inputs = []
        for neuron in layer:
            activation = neuron['weights'][-1] + np.dot(neuron['weights'][:-1], inputs)
            neuron['output'] = sigmoid(activation)
            new_inputs.append(neuron['output'])
        inputs = new_inputs
    return inputs

def backward_propagate_error(network, expected):
    for i in reversed(range(len(network))):
        layer = network[i]
        errors = []
        if i != len(network) - 1:
            for j in range(len(layer)):
                error = sum([neuron['weights'][j] * neuron['delta'] for neuron in network[i + 1]])
                errors.append(error)
        else:
            for j in range(len(layer)):
                neuron = layer[j]
                errors.append(expected[j] - neuron['output'])
        for j in range(len(layer)):
            neuron = layer[j]
            neuron['delta'] = errors[j] * sigmoid(neuron['output'], derivative=True)

def update_weights(network, row, l_rate):
    for i in range(len(network)):
        inputs = row[:-1] if i == 0 else [neuron['output'] for neuron in network[i - 1]]
        for neuron in network[i]:
            for j in range(len(inputs)):
                neuron['weights'][j] += l_rate * neuron['delta'] * inputs[j]
            neuron['weights'][-1] += l_rate * neuron['delta']

def train_network(network, train, l_rate, n_epoch, n_outputs):
    for epoch in range(n_epoch):
        for row in train:
            outputs = forward_propagate(network, row[:-1])
            expected = [0 for _ in range(n_outputs)]
            expected[int(row[-1])] = 1
            backward_propagate_error(network, expected)
            update_weights(network, row, l_rate)

def predict(network, row):
    outputs = forward_propagate(network, row)
    return outputs.index(max(outputs))

# Train and evaluate for different splits
splits = {'9010': 0.9, '8020': 0.8, '7030': 0.7}
output_folder = Path('data')
output_folder.mkdir(exist_ok=True)

for split_name, train_ratio in splits.items():
    # Split data
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=1-train_ratio, random_state=42, stratify=y)

    # Combine training data
    train_data = np.column_stack((x_train, y_train))

    # Initialize and train network
    n_inputs = x_train.shape[1]
    n_outputs = len(set(y))
    network = initialize_network(n_inputs, n_hidden, n_outputs)
    train_network(network, train_data, l_rate, n_epoch, n_outputs)

    # Predict on test data
    predictions = [predict(network, row) for row in x_test]
    accuracy = np.mean(predictions == y_test) * 100  # Calculate accuracy

    # Save predictions
    predicted_species = label_encoder.inverse_transform(predictions)
    df_test = pd.DataFrame(x_test, columns=features)
    df_test['Actual'] = label_encoder.inverse_transform(y_test)
    df_test['Predicted'] = predicted_species
    output_path = output_folder / f'predictions_{split_name}.csv'
    df_test.to_csv(output_path, index=False)

    print(f"Results for {split_name} saved to {output_path}")
    print(f"Accuracy for {split_name}: {accuracy:.2f}%")


Results for 9010 saved to data\predictions_9010.csv
Accuracy for 9010: 50.00%
Results for 8020 saved to data\predictions_8020.csv
Accuracy for 8020: 100.00%
Results for 7030 saved to data\predictions_7030.csv
Accuracy for 7030: 57.14%
