**Mutli-Modal Material Classifier For Material Selection**

In [32]:
# If getting errors here, make sure you have necessary libraries installed
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from autogluon.tabular import TabularDataset, TabularPredictor
from autogluon.tabular import TabularDataset, TabularPredictor
from autogluon.common.utils.utils import setup_outputdir
from autogluon.core.utils.loaders import load_pkl
from autogluon.core.utils.savers import save_pkl
import os.path
from sklearn.metrics import multilabel_confusion_matrix, f1_score, precision_score, recall_score, accuracy_score
from sklearn.metrics import r2_score, mean_squared_error,mean_absolute_error
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
import tensorflow as tf
from autogluon.tabular import TabularPredictor
from sklearn.preprocessing import LabelEncoder
import shap as shap
import tkinter as tk
from tkinter import ttk

**Data Preprocessing**

Classify the data through parsing of keywords in each material description.

In [2]:
# Data Preparation
# Rename for your device as necessary
filename = "Data.csv"
df = pd.read_csv(filename)
display(df)

def parse_and_compare(input_string, keywords):
    # Normalize input string and tokenize
    words = set(input_string.lower().split())
    # Check for intersection with each keyword group
    for label, key_group in keywords.items():
        if words.intersection(key_group):
            return label
    return 'Unspecified'

# Keywords dictionary
keywords = {
    'Steel': {'steel', 'sae', 'en', 'din', 'bs', 'csn', 'nf', 'jis'},
    'Aluminum': {'aluminum'},
    'Iron': {'iron', 'grey', 'cast', 'nodular'},
    'Brass': {'brass', 'red', 'semi-red', 'yellow'},
    'Copper': {'copper', 'high-copper'},
    'Bronze': {'bronze', 'phosphor', 'tin', 'lead', 'silicon', 'manganese'},
    'Magnesium': {'magnesium'}
}

# Apply the function to the entire 'Material' column
df['Label'] = df['Material'].apply(lambda x: parse_and_compare(x, keywords))

df['Sy'] = df['Sy'].str.replace(' max', '').astype(int)

# Drop the unused columns
df.drop(['Std','ID', 'Heat treatment', 'Desc','A5','Bhn','pH','Desc','HV'], axis=1, inplace=True)

# Add the 'Use' column  ---> Broad material selection, not massively applicable
# df['Use'] = (
#     (df['Su'].between(336, 505)) &
#     (df['Sy'].between(251, 376)) &
#     (df['E'].between(165000, 248000)) &
#     (df['G'].between(63000, 94000)) &
#     (df['mu'].between(0.24, 0.36)) &
#     (df['Ro'].between(6200, 9400))
# ).map({True: 'True', False: 'False'})

# Insert the 'Use' column at the second position
# df.insert(7, 'Use', df.pop('Use'))

# Write the updated data to a new file
df.to_csv('BigMaterialGroups.csv', index=False)
display(df)

Unnamed: 0,Std,ID,Material,Heat treatment,Su,Sy,A5,Bhn,E,G,mu,Ro,pH,Desc,HV
0,ANSI,D8894772B88F495093C43AF905AB6373,Steel SAE 1015,as-rolled,421,314,39.0,126.0,207000,79000,0.30,7860,,,
1,ANSI,05982AC66F064F9EBC709E7A4164613A,Steel SAE 1015,normalized,424,324,37.0,121.0,207000,79000,0.30,7860,,,
2,ANSI,356D6E63FF9A49A3AB23BF66BAC85DC3,Steel SAE 1015,annealed,386,284,37.0,111.0,207000,79000,0.30,7860,,,
3,ANSI,1C758F8714AC4E0D9BD8D8AE1625AECD,Steel SAE 1020,as-rolled,448,331,36.0,143.0,207000,79000,0.30,7860,,,
4,ANSI,DCE10036FC1946FC8C9108D598D116AD,Steel SAE 1020,normalized,441,346,35.8,131.0,207000,79000,0.30,7860,550.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1547,JIS,512A80EC21EA416BA2725B38BA8096EF,Nodular cast iron,,600,370,,,169000,70000,0.20,7160,480.0,Nodular cast iron,210.0
1548,JIS,38526441BA8741CA979DBF870D0B8A9B,Nodular cast iron,,700,420,,,169000,70000,0.20,7160,560.0,Nodular cast iron,230.0
1549,JIS,CAC03D7EB1AA45E68EFF92A2EF4C3D9B,Nodular cast iron,,800,480,,,169000,70000,0.20,7160,600.0,Nodular cast iron,240.0
1550,JIS,45C82A36EC644F8BB6170A99ED819B62,Malleable cast iron,,400,180,4.0,,160000,64000,0.27,7160,300.0,Malleable cast iron,220.0


Unnamed: 0,Material,Su,Sy,E,G,mu,Ro,Label
0,Steel SAE 1015,421,314,207000,79000,0.30,7860,Steel
1,Steel SAE 1015,424,324,207000,79000,0.30,7860,Steel
2,Steel SAE 1015,386,284,207000,79000,0.30,7860,Steel
3,Steel SAE 1020,448,331,207000,79000,0.30,7860,Steel
4,Steel SAE 1020,441,346,207000,79000,0.30,7860,Steel
...,...,...,...,...,...,...,...,...
1547,Nodular cast iron,600,370,169000,70000,0.20,7160,Iron
1548,Nodular cast iron,700,420,169000,70000,0.20,7160,Iron
1549,Nodular cast iron,800,480,169000,70000,0.20,7160,Iron
1550,Malleable cast iron,400,180,160000,64000,0.27,7160,Iron


Split and Scale the data

In [23]:
def process_dataset(filename, target, onehot = False, scaling = True):
    """Apply one-hot encoding and scaling to the dataset as necessary"""

    df = pd.read_csv(filename)
    df = df.drop(['Material'], axis=1) # MAY REMOVE IN FURTURE ITERATIONS

    # Separate features and target
    X = df.drop(target, axis=1)
    Y = df[target]
    if onehot:
        X = OH_encode(X)

    # Filter out outliers (confirm if this work with onehot encoding)
    for col in df.columns:
        # Skip the target column
        if col == target:
            continue
        upper_limit = df[col].quantile(0.99)
        df = df[df[col] <= upper_limit]

    # Split the dataset into training and testing sets
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=42)


    if scaling:
        # Initialize StandardScaler
        scaler = StandardScaler()

        # Fit the scaler on the training data and transform both training and testing data
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)

        # Reconstruct the DataFrame from scaled data
        train_data = pd.DataFrame(X_train_scaled, columns=X_train.columns)
        train_data = pd.concat([train_data, Y_train.reset_index(drop=True)], axis=1)

        test_data = pd.DataFrame(X_test_scaled, columns=X_test.columns)
        test_data['target'] = Y_test.reset_index(drop=True)
    else: # Simply use the unscaled data
        train_data = pd.concat([X_train.reset_index(drop=True), Y_train.reset_index(drop=True)], axis=1)
        test_data = pd.concat([X_test.reset_index(drop=True), Y_test.reset_index(drop=True)], axis=1)
        test_data.rename(columns={target: 'target'}, inplace=True)
        
    return train_data, test_data

def OH_encode(data):
    data=data.copy()
    #One-hot encode the materials
    data.loc[:, "Material"]=pd.Categorical(data["Material"], categories=["Steel", "Aluminum", "Titanium"])
    mats_oh=pd.get_dummies(data["Material"], prefix="Material=", prefix_sep="")
    data.drop(["Material"], axis=1, inplace=True)
    data=pd.concat([mats_oh, data], axis=1)
    return data


**Performance Metrics Functions**

In [41]:
def printScores(y_val, class_pred, average='weighted'): #proba):
    f1 = f1_score(y_val, class_pred, average='weighted')
    precision = precision_score(y_val, class_pred, average='weighted')
    recall = recall_score(y_val, class_pred, average='weighted')
    accuracy = accuracy_score(y_val, class_pred)
    #auc = roc_auc_score(y_val, proba)
    print(multilabel_confusion_matrix(y_val, class_pred, labels=[0, 1]))
    print("F1: " + str(f1))
    print("Precision: " + str(precision))
    print("Recall: " + str(recall))
    print("Accuracy: " + str(accuracy))
    #print("AUC: " + str(auc))

def returnscores_xv(Yval, predictions):
    score = np.zeros(4)
    score[0] = accuracy_score(Yval, predictions)
    score[1] = recall_score(Yval, predictions, average='macro', zero_division=0)
    score[2] = precision_score(Yval, predictions, average='macro', zero_division=0)
    score[3] = f1_score(Yval, predictions, average='macro')
    return score

def printscores_xv(scores):
    print("Accuracy: " + str(100 * scores[0]) + "%")
    print("Recall: " + str(100 * scores[1]) + "%")
    print("Precision: " + str(100 * scores[2]) + "%")
    print("F1: " + str(100 * scores[3]) + "%")

**AutoML**

Set-up Multilabel Predictor (Autogluon only handles one by default)

In [38]:
class MultilabelPredictor():
    """ Tabular Predictor for predicting multiple columns in table.
        Creates multiple TabularPredictor objects which you can also use individually.
        You can access the TabularPredictor for a particular label via: `multilabel_predictor.get_predictor(label_i)`

        Parameters
        ----------
        labels : List[str]
            The ith element of this list is the column (i.e. `label`) predicted by the ith TabularPredictor stored in this object.
        path : str, default = None
            Path to directory where models and intermediate outputs should be saved.
            If unspecified, a time-stamped folder called "AutogluonModels/ag-[TIMESTAMP]" will be created in the working directory to store all models.
            Note: To call `fit()` twice and save all results of each fit, you must specify different `path` locations or don't specify `path` at all.
            Otherwise files from first `fit()` will be overwritten by second `fit()`.
            Caution: when predicting many labels, this directory may grow large as it needs to store many TabularPredictors.
        problem_types : List[str], default = None
            The ith element is the `problem_type` for the ith TabularPredictor stored in this object.
        eval_metrics : List[str], default = None
            The ith element is the `eval_metric` for the ith TabularPredictor stored in this object.
        consider_labels_correlation : bool, default = True
            Whether the predictions of multiple labels should account for label correlations or predict each label independently of the others.
            If True, the ordering of `labels` may affect resulting accuracy as each label is predicted conditional on the previous labels appearing earlier in this list (i.e. in an auto-regressive fashion).
            Set to False if during inference you may want to individually use just the ith TabularPredictor without predicting all the other labels.
        kwargs :
            Arguments passed into the initialization of each TabularPredictor.

    """

    multi_predictor_file = 'multilabel_predictor.pkl'

    def __init__(self, labels, path=None, problem_types=None, eval_metrics=None, consider_labels_correlation=True, **kwargs):
        if len(labels) < 2:
            raise ValueError("MultilabelPredictor is only intended for predicting MULTIPLE labels (columns), use TabularPredictor for predicting one label (column).")
        if (problem_types is not None) and (len(problem_types) != len(labels)):
            raise ValueError("If provided, `problem_types` must have same length as `labels`")
        if (eval_metrics is not None) and (len(eval_metrics) != len(labels)):
            raise ValueError("If provided, `eval_metrics` must have same length as `labels`")
        self.path = setup_outputdir(path, warn_if_exist=False)
        self.labels = labels
        self.consider_labels_correlation = consider_labels_correlation
        self.predictors = {}  # key = label, value = TabularPredictor or str path to the TabularPredictor for this label
        if eval_metrics is None:
            self.eval_metrics = {}
        else:
            self.eval_metrics = {labels[i] : eval_metrics[i] for i in range(len(labels))}
        problem_type = None
        eval_metric = None
        for i in range(len(labels)):
            label = labels[i]
            path_i = self.path + "Predictor_" + label
            if problem_types is not None:
                problem_type = problem_types[i]
            if eval_metrics is not None:
                eval_metric = eval_metrics[i]
            self.predictors[label] = TabularPredictor(label=label, problem_type=problem_type, eval_metric=eval_metric, path=path_i, **kwargs)

    def fit(self, train_data, tuning_data=None, **kwargs):
        """ Fits a separate TabularPredictor to predict each of the labels.

            Parameters
            ----------
            train_data, tuning_data : str or autogluon.tabular.TabularDataset or pd.DataFrame
                See documentation for `TabularPredictor.fit()`.
            kwargs :
                Arguments passed into the `fit()` call for each TabularPredictor.
        """
        if isinstance(train_data, str):
            train_data = TabularDataset(train_data)
        if tuning_data is not None and isinstance(tuning_data, str):
            tuning_data = TabularDataset(tuning_data)
        train_data_og = train_data.copy()
        if tuning_data is not None:
            tuning_data_og = tuning_data.copy()
        else:
            tuning_data_og = None
        save_metrics = len(self.eval_metrics) == 0
        for i in range(len(self.labels)):
            label = self.labels[i]
            predictor = self.get_predictor(label)
            if not self.consider_labels_correlation:
                labels_to_drop = [l for l in self.labels if l != label]
            else:
                labels_to_drop = [self.labels[j] for j in range(i+1, len(self.labels))]
            train_data = train_data_og.drop(labels_to_drop, axis=1)
            if tuning_data is not None:
                tuning_data = tuning_data_og.drop(labels_to_drop, axis=1)
            print(f"Fitting TabularPredictor for label: {label} ...")
            predictor.fit(train_data=train_data, tuning_data=tuning_data, **kwargs)
            self.predictors[label] = predictor.path
            if save_metrics:
                self.eval_metrics[label] = predictor.eval_metric
        self.save()

    def predict(self, data, **kwargs):
        """ Returns DataFrame with label columns containing predictions for each label.

            Parameters
            ----------
            data : str or autogluon.tabular.TabularDataset or pd.DataFrame
                Data to make predictions for. If label columns are present in this data, they will be ignored. See documentation for `TabularPredictor.predict()`.
            kwargs :
                Arguments passed into the predict() call for each TabularPredictor.
        """
        return self._predict(data, as_proba=False, **kwargs)

    def predict_proba(self, data, **kwargs):
        """ Returns dict where each key is a label and the corresponding value is the `predict_proba()` output for just that label.

            Parameters
            ----------
            data : str or autogluon.tabular.TabularDataset or pd.DataFrame
                Data to make predictions for. See documentation for `TabularPredictor.predict()` and `TabularPredictor.predict_proba()`.
            kwargs :
                Arguments passed into the `predict_proba()` call for each TabularPredictor (also passed into a `predict()` call).
        """
        return self._predict(data, as_proba=True, **kwargs)

    def evaluate(self, data, **kwargs):
        """ Returns dict where each key is a label and the corresponding value is the `evaluate()` output for just that label.

            Parameters
            ----------
            data : str or autogluon.tabular.TabularDataset or pd.DataFrame
                Data to evalate predictions of all labels for, must contain all labels as columns. See documentation for `TabularPredictor.evaluate()`.
            kwargs :
                Arguments passed into the `evaluate()` call for each TabularPredictor (also passed into the `predict()` call).
        """
        data = self._get_data(data)
        eval_dict = {}
        for label in self.labels:
            print(f"Evaluating TabularPredictor for label: {label} ...")
            predictor = self.get_predictor(label)
            eval_dict[label] = predictor.evaluate(data, **kwargs)
            if self.consider_labels_correlation:
                data[label] = predictor.predict(data, **kwargs)
        return eval_dict

    def save(self):
        """ Save MultilabelPredictor to disk. """
        for label in self.labels:
            if not isinstance(self.predictors[label], str):
                self.predictors[label] = self.predictors[label].path
        save_pkl.save(path=self.path+self.multi_predictor_file, object=self)
        print(f"MultilabelPredictor saved to disk. Load with: MultilabelPredictor.load('{self.path}')")

    @classmethod
    def load(cls, path):
        """ Load MultilabelPredictor from disk `path` previously specified when creating this MultilabelPredictor. """
        path = os.path.expanduser(path)
        if path[-1] != os.path.sep:
            path = path + os.path.sep
        return load_pkl.load(path=path+cls.multi_predictor_file)

    def get_predictor(self, label):
        """ Returns TabularPredictor which is used to predict this label. """
        predictor = self.predictors[label]
        if isinstance(predictor, str):
            return TabularPredictor.load(path=predictor)
        return predictor

    def _get_data(self, data):
        if isinstance(data, str):
            return TabularDataset(data)
        return data.copy()

    def _predict(self, data, as_proba=False, **kwargs):
        data = self._get_data(data)
        if as_proba:
            predproba_dict = {}
        for label in self.labels:
#             print(f"Predicting with TabularPredictor for label: {label} ...")
            predictor = self.get_predictor(label)
            if as_proba:
                predproba_dict[label] = predictor.predict_proba(data, as_multiclass=True, **kwargs)
            data[label] = predictor.predict(data, **kwargs)
        if not as_proba:
            return data[self.labels]
        else:
            return predproba_dict

**Model Training**

In [45]:
# Get training and test data
target = 'Label'
train_data, test_data = process_dataset('BigMaterialGroups.csv', target, onehot = False, scaling = True) # Note that this process renames "Label" to "target"

# Separating features and target for traditional ML models
X_train = train_data.drop(target, axis=1)
Y_train = train_data[target]

# Train Logistic Regression model
lr_model = LogisticRegression()
lr_model.fit(X_train, Y_train)

# Train Random Forest model
rf_model = RandomForestClassifier()
rf_model.fit(X_train, Y_train)

# Train FFNN model
class F1Score(tf.keras.metrics.Metric):
    def __init__(self, name='f1_score', **kwargs):
        super(F1Score, self).__init__(name=name, **kwargs)
        self.precision = tf.keras.metrics.Precision()
        self.recall = tf.keras.metrics.Recall()

    def update_state(self, y_true, y_pred, sample_weight=None):
        y_pred = tf.round(tf.nn.softmax(y_pred))
        self.precision.update_state(y_true, y_pred, sample_weight)
        self.recall.update_state(y_true, y_pred, sample_weight)

    def result(self):
        p = self.precision.result()
        r = self.recall.result()
        return 2 * ((p * r) / (p + r + tf.keras.backend.epsilon()))

    def reset_states(self):
        self.precision.reset_states()
        self.recall.reset_states()

label_encoder = LabelEncoder()
Y_train_encoded = label_encoder.fit_transform(Y_train)

ffnn_model = tf.keras.Sequential([
    tf.keras.layers.Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(8, activation='softmax') # 8 classes, adjust if number of labels changes
])
ffnn_model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
ffnn_model.fit(X_train, Y_train_encoded, epochs=50, batch_size=32)

# Train autoML model
autoML_model = TabularPredictor(label=target).fit(train_data)

lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


No path specified. Models will be saved in: "AutogluonModels\ag-20231201_032229\"
Beginning AutoGluon training ...
AutoGluon will save models to "AutogluonModels\ag-20231201_032229\"
AutoGluon Version:  0.8.2
Python Version:     3.10.2
Operating System:   Windows
Platform Machine:   AMD64
Platform Version:   10.0.22621
Disk Space Avail:   32.11 GB / 510.96 GB (6.3%)
Train Data Rows:    1241
Train Data Columns: 6
Label Column: Label
Preprocessing data ...
AutoGluon infers your prediction problem is: 'multiclass' (because dtype of label-column == object).
	8 unique label values:  ['Steel', 'Copper', 'Aluminum', 'Iron', 'Brass', 'Bronze', 'Magnesium', 'Unspecified']
	If 'multiclass' is not the correct problem_type, please manually specify the problem_type parameter during predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])
Train Data Class Count: 8
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
	Ava

**Evaluate Models**

In [46]:
# True labels
y_test = test_data['target']

# Data without labels
test_data_nolab = test_data.drop(labels=['target'], axis=1)
#test_data_nolab_encoded = label_encoder.transform(test_data_nolab)

# Logistic Regression Evaluation
lr_y_pred = lr_model.predict(test_data_nolab)
print("Logistic Regression Evaluation:")
printScores(y_test, lr_y_pred, average='weighted')

# Random Forest Evaluation
rf_y_pred = rf_model.predict(test_data_nolab)
print("\nRandom Forest Evaluation:")
printScores(y_test, rf_y_pred, average='weighted')

# FFNN Evaluation
# Convert test data to array for TensorFlow model
test_data_array = test_data_nolab.to_numpy()
# Predict using FFNN model
ffnn_y_pred = ffnn_model.predict(test_data_array)
# Convert probabilities to class labels
ffnn_y_pred = tf.argmax(ffnn_y_pred, axis=1).numpy()
ffnn_y_pred = label_encoder.inverse_transform(ffnn_y_pred)
print("\nFFNN Evaluation:")
printScores(y_test, ffnn_y_pred, average='weighted')

# AutoGluon Evaluation
y_pred_autogluon = autoML_model.predict(test_data_nolab)
print("\nAutoGluon Evaluation:")
printScores(y_test, y_pred_autogluon, average='weighted')

Logistic Regression Evaluation:
[[[311   0]
  [  0   0]]

 [[311   0]
  [  0   0]]]
F1: 0.8863534395902106
Precision: 0.891667121015348
Recall: 0.9035369774919614
Accuracy: 0.9035369774919614

Random Forest Evaluation:
[[[311   0]
  [  0   0]]

 [[311   0]
  [  0   0]]]
F1: 0.9809972016495088
Precision: 0.9838768948093707
Recall: 0.9807073954983923
Accuracy: 0.9807073954983923
 1/10 [==>...........................] - ETA: 0s

Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison
elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison



FFNN Evaluation:
[[[311   0]
  [  0   0]]

 [[311   0]
  [  0   0]]]
F1: 0.9844925072822375
Precision: 0.9863782519731072
Recall: 0.9839228295819936
Accuracy: 0.9839228295819936

AutoGluon Evaluation:
[[[311   0]
  [  0   0]]

 [[311   0]
  [  0   0]]]
F1: 0.9799001481519675
Precision: 0.9770473972935442
Recall: 0.9839228295819936
Accuracy: 0.9839228295819936


elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison
Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison


**Model Interpretation**

In [None]:


shap.intitjs()


**User Interface**

In [8]:

import random

# HELPER FUNCTIONS
# Function to collect and process inputs, then display outputs
def submit():
    inputs = {}
    for var, entry in entries.items():
        inputs[var] = entry.get()

    # Simulate model prediction
    results = plug_into_model(inputs)

    # Display the results
    display_results(results)

# Mock function to simulate ML model predictions
def plug_into_model(user_inputs, model):
    # This is just a placeholder. Replace with your actual ML model logic.
    materials = ["Steel", "Aluminum", "Titanium", "Copper"]
    properties = ["High Strength", "Lightweight", "Corrosion Resistant", "High Conductivity"]
    results = []
    for _ in range(random.randint(1, 5)):  # Generate a random number of results
        material = random.choice(materials)
        property = random.choice(properties)
        results.append(f"{material}: {property}")
    return results

# Function to display the results in a new window
def display_results(results):
    result_window = tk.Toplevel(root)
    result_window.title("AutoML Materials Classifier Results")

    ttk.Label(result_window, text="Predicted Material Properties:", font=("Arial", 14)).pack(pady=10)

    for result in results:
        ttk.Label(result_window, text=result).pack()

# GUI CREATION
# Create the main window
root = tk.Tk()
root.title("Materials Property ML Classifier")

# Define the variables you want inputs for
# Define input variables
kaggle_variables = ["Young's Modulus [MPa]", "Shear Modulus [GPa]", 
                    "Yield Strength [MPa]", "Ultimate Tensile Strength [MPa]", 
                    "Poisson's Ration [/]", "Density [kg/m^3]"]

# Create a dictionary to hold the entry widgets
entries = {}

# Create and pack the widgets for inputs
for var in kaggle_variables:
    frame = ttk.Frame(root)
    frame.pack(fill='x', padx=5, pady=5)

    label = ttk.Label(frame, text=var)
    label.pack(side='left')

    entry = ttk.Entry(frame)
    entry.pack(side='right', expand=True, fill='x')
    entries[var] = entry

# Submit button
submit_button = ttk.Button(root, text="Submit", command=submit)
submit_button.pack(pady=10)

# Start the GUI event loop
root.mainloop()
