In [31]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset


In [None]:
file_path = 'almonds/Almond.csv'
almond_data = pd.read_csv(file_path)

almond_data.info(), print(almond_data.head())

almond_data_cleaned = almond_data.drop(columns=['Unnamed: 0'])

features = almond_data_cleaned.drop(columns=['Type'])
target = almond_data_cleaned['Type']

scaler = MinMaxScaler()
features_normalized = pd.DataFrame(scaler.fit_transform(features), columns=features.columns)

features_normalized_filled = features_normalized.fillna(-1)

encoder = OneHotEncoder(sparse=False)
target_encoded_filled = pd.DataFrame(encoder.fit_transform(target.values.reshape(-1, 1)), columns=encoder.categories_[0])

preprocessed_data_filled = pd.concat([features_normalized_filled, target_encoded_filled], axis=1)

for index, row in preprocessed_data_filled.iterrows():
        for col in preprocessed_data_filled.columns:
            value = row[col]
            if not (0 <= value <= 1 or value == -1):
                print(f"Invalid value found: {value} at row {index}, column '{col}'")

In [33]:

output_file_path = 'almonds/Almond_Prepped.csv'
preprocessed_data_filled.to_csv(output_file_path, index=False)

In [None]:
Prepped_file_path = 'almonds/Almond_Prepped.csv'
preprocessed_data_filled = pd.read_csv(Prepped_file_path)
print(preprocessed_data_filled.sample(10))


In [35]:
X = preprocessed_data_filled.drop(columns=['MAMRA', 'REGULAR', 'SANORA']).values
Y = preprocessed_data_filled[['MAMRA', 'REGULAR', 'SANORA']].values

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
X_train, X_eval, Y_train, Y_eval = train_test_split(X_train, Y_train, test_size=0.375, random_state=42)

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
Y_train_tensor = torch.tensor(Y_train, dtype=torch.float32)
X_eval_tensor = torch.tensor(X_eval, dtype=torch.float32)
Y_eval_tensor = torch.tensor(Y_eval, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
Y_test_tensor = torch.tensor(Y_test, dtype=torch.float32)

train_dataset = TensorDataset(X_train_tensor, Y_train_tensor)
eval_dataset = TensorDataset(X_eval_tensor, Y_eval_tensor)
test_dataset = TensorDataset(X_test_tensor, Y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
eval_loader = DataLoader(eval_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [36]:
class NN(nn.Module):
    def __init__(self, input_size, hidden_layers, output_size, actFunc = 'ReLU'):
        super(NN, self).__init__()
        self.inputLayer = nn.Linear(input_size, hidden_layers[0])
        self.hiddenLayers = []
        for i in range(len(hidden_layers)-1):
            self.hiddenLayers.append(nn.Linear(hidden_layers[i], hidden_layers[i+1]))
        self.outputLayer = nn.Linear(hidden_layers[-1], output_size)
        self.actFunc = nn.ReLU()
        if (actFunc == 'Sigmoid'):
            self.actFunc = nn.Sigmoid()
        if (actFunc == 'TanH'):
            self.actFunc = nn.Tanh()
        if (actFunc == 'TanH'):
            self.actFunc = nn.Tanh()
        if (actFunc == 'TanH'):
            self.actFunc = nn.Tanh()
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.actFunc(self.inputLayer(x))
        for layer in self.hiddenLayers:
            x = self.actFunc(layer(x))
        x = self.outputLayer(x)
        x = self.softmax(x)
        return x

In [37]:

def train_model(optimizer, model, loader, num_epochs, verbose = 1):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for inputs, labels in loader:
            outputs = model(inputs)
            criterion = nn.CrossEntropyLoss()
            loss = criterion(outputs, torch.max(labels, 1)[1])
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        if(verbose == 1):
            print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(loader):.4f}')

def test_model(model, loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            labels_class = torch.max(labels, 1)[1]
            total += labels.size(0)
            correct += (predicted == labels_class).sum().item()
    accuracy = 100 * (correct / total)
    print(f'Test Accuracy: {accuracy:.2f}%')

In [None]:
def simulate(hiddenLayers, activationFunction, train_loader, eval_loader, epochs = 20):
    input_size = 12
    output_size = 3
    
    hidden_size = [4]
    for _ in range(hiddenLayers - 1):
        next_number = hidden_size[0] * 2
        hidden_size.insert(0, next_number)
    
    learning_rate = 0.001
    model = NN(input_size, hidden_size, output_size, activationFunction)
    optimizer_adam = optim.Adam(model.parameters(), lr=learning_rate)

    train_model(optimizer_adam, model, train_loader, epochs, verbose=0)
    test_model(model, eval_loader)

    return model
