In [1]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset


In [None]:
file_path = 'almonds/Almond.csv'
almond_data = pd.read_csv(file_path)

almond_data.info(), print(almond_data.head())

almond_data_cleaned = almond_data.drop(columns=['Unnamed: 0'])

features = almond_data_cleaned.drop(columns=['Type'])
target = almond_data_cleaned['Type']

scaler = MinMaxScaler()
features_normalized = pd.DataFrame(scaler.fit_transform(features), columns=features.columns)

features_normalized_filled = features_normalized.fillna(-1)

encoder = OneHotEncoder(sparse=False)
target_encoded_filled = pd.DataFrame(encoder.fit_transform(target.values.reshape(-1, 1)), columns=encoder.categories_[0])

preprocessed_data_filled = pd.concat([features_normalized_filled, target_encoded_filled], axis=1)

for index, row in preprocessed_data_filled.iterrows():
        for col in preprocessed_data_filled.columns:
            value = row[col]
            if not (0 <= value <= 1 or value == -1):
                print(f"Invalid value found: {value} at row {index}, column '{col}'")

In [4]:

output_file_path = 'almonds/Almond_Prepped.csv'
preprocessed_data_filled.to_csv(output_file_path, index=False)

In [None]:
Prepped_file_path = 'almonds/Almond_Prepped.csv'
preprocessed_data_filled = pd.read_csv(Prepped_file_path)
print(preprocessed_data_filled.sample(10))


In [None]:
X = preprocessed_data_filled.drop(columns=['MAMRA', 'REGULAR', 'SANORA']).values
y = preprocessed_data_filled[['MAMRA', 'REGULAR', 'SANORA']].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [6]:
class NN(nn.Module):
    def __init__(self, input_size, hidden_layers, output_size, actFunc):
        super(NN, self).__init__()
        self.inputLayer = nn.Linear(input_size, hidden_layers[0])
        self.hiddenLayers = []
        for i in range(len(hidden_layers)-1):
            self.hiddenLayers.append(nn.Linear(hidden_layers[i], hidden_layers[i+1]))
        self.outputLayer = nn.Linear(hidden_layers[-1], output_size)
        self.actFunc = nn.ReLU()
        if (actFunc == 'Sigmoid'):
            self.actFunc = nn.Sigmoid()
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.actFunc(self.inputLayer(x))
        for layer in self.hiddenLayers:
            x = self.actFunc(layer(x))
        x = self.outputLayer(x)
        x = self.softmax(x)
        return x