In [2]:
#Imports
import pandas as pd
import sklearn as skl
from sklearn.model_selection import train_test_split
import tensorflow as tf

In [17]:
#Read in Data
red_df = pd.read_csv('../Resources/winequality-red.csv', delimiter=';')
white_df = pd.read_csv('../Resources/winequality-white.csv', delimiter=';')

In [4]:
def TestModel(features_to_test):
    #Splitting
    target_red = pd.get_dummies(red_df['quality'])
    features_red = red_df[features_to_test]

    target_white = pd.get_dummies(white_df['quality'])
    features_white = white_df[features_to_test]

    X_train_red, X_test_red, y_train_red, y_test_red = train_test_split(features_red, target_red, random_state=42)
    
    X_train_white, X_test_white, y_train_white, y_test_white = train_test_split(features_white, target_white, random_state=42)


    #Scaling
    X_scaler_red = skl.preprocessing.StandardScaler()
    X_scaler_white = skl.preprocessing.StandardScaler()

    X_scaler_red.fit(X_train_red)
    X_scaler_white.fit(X_train_white)

    X_train_scaled_red = X_scaler_red.transform(X_train_red)
    X_test_scaled_red = X_scaler_red.transform(X_test_red)

    X_train_scaled_white = X_scaler_white.transform(X_train_white)
    X_test_scaled_white = X_scaler_white.transform(X_test_white)


    inputs = len(features_to_test)
    
    #Create and Evaluate Red Model
    model_red = tf.keras.models.Sequential()
    model_red.add(tf.keras.layers.Dense(units=25, activation='selu', input_dim=inputs))
    model_red.add(tf.keras.layers.Dense(units=20, activation='softplus'))
    model_red.add(tf.keras.layers.Dense(units=10, activation='softsign'))
    model_red.add(tf.keras.layers.Dense(len(set(red_df['quality'].values)), activation='softmax'))
    
    model_red.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    fit_model_red = model_red.fit(X_train_scaled_red, y_train_red, epochs=30)
    model_red_loss, model_red_accuracy = model_red.evaluate(X_test_scaled_red, y_test_red, verbose=2)

    #Create and Evaluate White Model
    model_white = tf.keras.models.Sequential()
    model_white.add(tf.keras.layers.Dense(units=25, activation='selu', input_dim=inputs))
    model_white.add(tf.keras.layers.Dense(units=20, activation='softplus'))
    model_white.add(tf.keras.layers.Dense(units=10, activation='softsign'))
    model_white.add(tf.keras.layers.Dense(len(set(white_df['quality'].values)), activation='softmax'))

    model_white.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    fit_model_white = model_white.fit(X_train_scaled_white, y_train_white, epochs=25)
    model_white_loss, model_white_accuracy = model_white.evaluate(X_test_scaled_white, y_test_white, verbose=2)

    return({'Red_Loss':model_red_loss, 'Red_Acc':model_red_accuracy, 
            'White_Loss':model_white_loss, 'White_Acc':model_white_accuracy})
    

In [5]:
feature_results = []
missing_feature_results = []

In [None]:
#Test Single Features
cols = red_df.drop(columns='quality').columns
results = []
for feature in cols:
    result = TestModel([feature])
    results.append({'feature':feature, 'result':result})
feature_results.append({'single_features':results})

In [8]:
#Test Double Features
cols = red_df.drop(columns='quality').columns
results = []
for feature_a in cols:
    for feature_b in cols:
        result = TestModel([feature_a, feature_b])
        results.append({'features': [feature_a, feature_b], 'result':result})
feature_results.append({'double_features':results})

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
13/13 - 0s - loss: 1.1556 - accuracy: 0.4300 - 194ms/epoch - 15ms/step
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
39/39 - 0s - loss: 1.2867 - accuracy: 0.4473 - 359ms/epoch - 9ms/step
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/3

In [27]:
#Test Single Missing Features
cols = red_df.drop(columns='quality').columns
results = []
for feature in cols:
    feature_list = list(cols)
    feature_list.remove(feature)
    result = TestModel(feature_list)
    results.append({'feature':feature, 'result':result})
missing_feature_results.append({'single_features':results})

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
13/13 - 0s - loss: 0.9147 - accuracy: 0.5950 - 225ms/epoch - 17ms/step
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
39/39 - 0s - loss: 1.0510 - accuracy: 0.5404 - 223ms/epoch - 6ms/step
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/3

In [30]:
#Test Double Missing Features
cols = red_df.drop(columns='quality').columns
results = []
for feature_a in cols:
    for feature_b in cols:
        if feature_a != feature_b:
            feature_list = list(cols)
            feature_list.remove(feature_a)
            feature_list.remove(feature_b)
            result = TestModel(feature_list)
            results.append({'features': [feature_a, feature_b], 'result':result})
missing_feature_results.append({'double_features':results})

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
13/13 - 0s - loss: 0.9221 - accuracy: 0.5925 - 195ms/epoch - 15ms/step
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
39/39 - 0s - loss: 1.0788 - accuracy: 0.5314 - 216ms/epoch - 6ms/step
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/3

In [31]:
import json
with open('Outputs/feature_results.json', 'w', encoding='utf-8') as f:
    json.dump(feature_results, f, ensure_ascii=False, indent=4)
with open('Outputs/missing_feature_results.json', 'w', encoding='utf-8') as f:
    json.dump(missing_feature_results, f, ensure_ascii=False, indent=4)