In [372]:
# Set the seed value for the notebook so the results are reproducible
from numpy.random import seed
seed(1)

In [373]:
# Dependencies
import numpy as np
import pandas as pd

In [374]:
import tensorflow
tensorflow.keras.__version__

'2.2.4-tf'

In [375]:
df = pd.read_csv("Data/responses.csv")
# Drop the null columns where all values are null
df = df.dropna(axis='columns', how='all')
# Drop the null rows
df = df.dropna()
df.head()


Unnamed: 0,Music,Slow songs or fast songs,Dance,Folk,Country,Classical music,Musical,Pop,Rock,Metal or Hardrock,...,Age,Height,Weight,Number of siblings,Gender,Left - right handed,Education,Only child,Village - town,House - block of flats
0,5.0,3.0,2.0,1.0,2.0,2.0,1.0,5.0,5.0,1.0,...,20.0,163.0,48.0,1.0,female,right handed,college/bachelor degree,no,village,block of flats
1,4.0,4.0,2.0,1.0,1.0,1.0,2.0,3.0,5.0,4.0,...,19.0,163.0,58.0,2.0,female,right handed,college/bachelor degree,no,city,block of flats
2,5.0,5.0,2.0,2.0,3.0,4.0,5.0,3.0,5.0,3.0,...,20.0,176.0,67.0,2.0,female,right handed,secondary school,no,city,block of flats
4,5.0,3.0,4.0,3.0,2.0,4.0,3.0,5.0,3.0,1.0,...,20.0,170.0,59.0,1.0,female,right handed,secondary school,no,village,house/bungalow
5,5.0,3.0,2.0,3.0,2.0,3.0,3.0,2.0,5.0,5.0,...,20.0,186.0,77.0,1.0,male,right handed,secondary school,no,city,block of flats


In [376]:
for col in df.columns: 
    print(col) 

Music
Slow songs or fast songs
Dance
Folk
Country
Classical music
Musical
Pop
Rock
Metal or Hardrock
Punk
Hiphop, Rap
Reggae, Ska
Swing, Jazz
Rock n roll
Alternative
Latino
Techno, Trance
Opera
Movies
Horror
Thriller
Comedy
Romantic
Sci-fi
War
Fantasy/Fairy tales
Animated
Documentary
Western
Action
History
Psychology
Politics
Mathematics
Physics
Internet
PC
Economy Management
Biology
Chemistry
Reading
Geography
Foreign languages
Medicine
Law
Cars
Art exhibitions
Religion
Countryside, outdoors
Dancing
Musical instruments
Writing
Passive sport
Active sport
Gardening
Celebrities
Shopping
Science and technology
Theatre
Fun with friends
Adrenaline sports
Pets
Flying
Storm
Darkness
Heights
Spiders
Snakes
Rats
Ageing
Dangerous dogs
Fear of public speaking
Smoking
Alcohol
Healthy eating
Daily events
Prioritising workload
Writing notes
Workaholism
Thinking ahead
Final judgement
Reliability
Keeping promises
Loss of interest
Friends versus money
Funniness
Fake
Criminal damage
Decision making
Elec

In [377]:

music_features = df[['Music', 'Slow songs or fast songs', 'Dance', 'Folk','Country','Classical music','Musical','Pop','Rock','Metal or Hardrock','Punk','Hiphop, Rap','Reggae, Ska','Swing, Jazz','Rock n roll','Alternative','Latino','Techno, Trance','Opera']]

movie_features = df[['Movies','Horror','Thriller','Comedy','Romantic','Sci-fi','War','Fantasy/Fairy tales','Animated','Documentary','Western','Action']]


hobbies_features = df[['History','Psychology','Politics','Mathematics','Physics','Internet','PC','Economy Management','Biology','Chemistry','Reading','Geography','Foreign languages','Medicine','Law','Cars','Art exhibitions','Religion','Countryside, outdoors','Dancing','Musical instruments','Writing','Passive sport','Active sport','Gardening','Celebrities','Shopping','Science and technology','Theatre','Fun with friends','Adrenaline sports','Pets']]

phobia_features = df[['Flying','Storm','Darkness','Heights','Spiders','Snakes','Rats','Ageing','Dangerous dogs','Fear of public speaking']]


health_features = df[['Smoking','Alcohol', 'Healthy eating']]

personality_opinion_features = df[['Daily events','Prioritising workload','Writing notes','Workaholism','Thinking ahead','Final judgement','Reliability',
'Keeping promises','Loss of interest','Friends versus money','Funniness','Fake','Criminal damage','Decision making','Elections','Self-criticism','Judgment calls','Hypochondria','Empathy'
,'Eating to survive','Giving','Compassion to animals','Borrowed stuff','Loneliness','Cheating in school','Health','Changing the past','God'
,'Dreams','Charity','Number of friends','Punctuality','Lying','Waiting','New environment','Mood swings','Appearence and gestures','Socializing'
,'Achievements','Responding to a serious letter','Children','Assertiveness','Getting angry','Knowing the right people','Public speaking','Unpopularity'
,'Life struggles','Happiness in life','Energy levels','Small - big dogs','Personality','Finding lost valuables','Getting up','Interests or hobbies',"Parents' advice"
,'Questionnaires or polls','Internet usage']]

spending_features = df[['Finances','Shopping centres','Branded clothing','Entertainment spending','Spending on looks','Spending on gadgets','Spending on healthy eating']]

demographic_features = df[['Age','Height','Weight','Number of siblings','Gender','Left - right handed','Education','Only child','Village - town','House - block of flats']]

education_features = df[['Finding lost valuables','Happiness in life','Achievements','Waiting','Charity','God','Cheating in school',
                         'Elections','Writing notes']]
gender_features = df [['Writing notes','Writing notes','Funniness','Criminal damage','Eating to survive','Giving','Compassion to animals','Mood swings'
               ,'Personality','Interests or hobbies','Questionnaires or polls']]

In [349]:
selected_features = df[["Weight","Height"]]

In [564]:
X = education_features
y = df["Education"]
print(X.shape, y.shape)

(674, 9) (674,)


In [601]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.keras.utils import to_categorical

In [602]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=1, stratify=y)

In [603]:
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [604]:
# Step 1: Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

In [605]:
# Step 2: Convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

In [606]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [634]:
# Create model and add layers
model = Sequential()
model.add(Dense(units=100, activation='relu', input_dim= (X.shape[1])))
#model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=6, activation='softmax'))

In [635]:
# Compile and fit the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [636]:
model.summary()

Model: "sequential_59"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_148 (Dense)            (None, 100)               1000      
_________________________________________________________________
dense_149 (Dense)            (None, 6)                 606       
Total params: 1,606
Trainable params: 1,606
Non-trainable params: 0
_________________________________________________________________


In [637]:
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=200,
    shuffle=True,
    verbose=2
)

Epoch 1/200
505/505 - 0s - loss: 1.5303 - acc: 0.5426
Epoch 2/200
505/505 - 0s - loss: 1.2452 - acc: 0.6297
Epoch 3/200
505/505 - 0s - loss: 1.1201 - acc: 0.6297
Epoch 4/200
505/505 - 0s - loss: 1.0808 - acc: 0.6297
Epoch 5/200
505/505 - 0s - loss: 1.0596 - acc: 0.6297
Epoch 6/200
505/505 - 0s - loss: 1.0474 - acc: 0.6297
Epoch 7/200
505/505 - 0s - loss: 1.0365 - acc: 0.6277
Epoch 8/200
505/505 - 0s - loss: 1.0291 - acc: 0.6277
Epoch 9/200
505/505 - 0s - loss: 1.0234 - acc: 0.6277
Epoch 10/200
505/505 - 0s - loss: 1.0163 - acc: 0.6277
Epoch 11/200
505/505 - 0s - loss: 1.0126 - acc: 0.6277
Epoch 12/200
505/505 - 0s - loss: 1.0083 - acc: 0.6277
Epoch 13/200
505/505 - 0s - loss: 1.0068 - acc: 0.6277
Epoch 14/200
505/505 - 0s - loss: 1.0006 - acc: 0.6277
Epoch 15/200
505/505 - 0s - loss: 0.9979 - acc: 0.6277
Epoch 16/200
505/505 - 0s - loss: 0.9974 - acc: 0.6277
Epoch 17/200
505/505 - 0s - loss: 0.9934 - acc: 0.6277
Epoch 18/200
505/505 - 0s - loss: 0.9901 - acc: 0.6277
Epoch 19/200
505/50

Epoch 150/200
505/505 - 0s - loss: 0.8302 - acc: 0.6772
Epoch 151/200
505/505 - 0s - loss: 0.8295 - acc: 0.6871
Epoch 152/200
505/505 - 0s - loss: 0.8279 - acc: 0.6792
Epoch 153/200
505/505 - 0s - loss: 0.8294 - acc: 0.6950
Epoch 154/200
505/505 - 0s - loss: 0.8261 - acc: 0.6752
Epoch 155/200
505/505 - 0s - loss: 0.8245 - acc: 0.6851
Epoch 156/200
505/505 - 0s - loss: 0.8237 - acc: 0.6851
Epoch 157/200
505/505 - 0s - loss: 0.8234 - acc: 0.6832
Epoch 158/200
505/505 - 0s - loss: 0.8227 - acc: 0.6812
Epoch 159/200
505/505 - 0s - loss: 0.8227 - acc: 0.6812
Epoch 160/200
505/505 - 0s - loss: 0.8216 - acc: 0.6950
Epoch 161/200
505/505 - 0s - loss: 0.8197 - acc: 0.6891
Epoch 162/200
505/505 - 0s - loss: 0.8196 - acc: 0.6792
Epoch 163/200
505/505 - 0s - loss: 0.8182 - acc: 0.6911
Epoch 164/200
505/505 - 0s - loss: 0.8178 - acc: 0.6832
Epoch 165/200
505/505 - 0s - loss: 0.8174 - acc: 0.6911
Epoch 166/200
505/505 - 0s - loss: 0.8165 - acc: 0.6832
Epoch 167/200
505/505 - 0s - loss: 0.8151 - acc:

<tensorflow.python.keras.callbacks.History at 0x25ead057c88>

In [638]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Deep Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

169/169 - 0s - loss: 1.1099 - acc: 0.6450
Deep Neural Network - Loss: 1.1098718452735765, Accuracy: 0.6449704170227051


In [639]:
encoded_predictions = model.predict_classes(X_test_scaled[:10])
prediction_labels = label_encoder.inverse_transform(encoded_predictions)

In [640]:
predictions = model.predict_classes(X_test_scaled)
prediction_labels = label_encoder.inverse_transform(predictions)
pd.DataFrame({"Prediction": prediction_labels, "Actual": y_test}).head(50)

Unnamed: 0,Prediction,Actual
114,secondary school,secondary school
584,secondary school,masters degree
119,secondary school,secondary school
518,secondary school,secondary school
727,secondary school,secondary school
312,secondary school,primary school
1003,secondary school,secondary school
633,secondary school,secondary school
1004,secondary school,secondary school
742,secondary school,masters degree


In [641]:
nn_education = model
nn_education

<tensorflow.python.keras.engine.sequential.Sequential at 0x25eabfb8128>

In [642]:
# save your model by updating "your_name" with your name
# and "your_model" with your model variable
# be sure to turn this in to BCS
# if joblib fails to import, try running the command to install in terminal/git-bash
import joblib
filename = 'nn_education.sav'
joblib.dump(nn_education, filename)

TypeError: can't pickle _thread.RLock objects