In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

In [3]:
train.head()

Unnamed: 0.1,Unnamed: 0,gender,ethnicity,parental level of education,lunch,test preparation course,reading score,writing score,math score
0,0,male,group C,some college,standard,none,61,62,61
1,1,female,group C,associate's degree,standard,none,62,53,53
2,2,female,group C,some college,free/reduced,completed,75,70,67
3,3,male,group C,some high school,free/reduced,none,76,65,79
4,4,male,group A,high school,free/reduced,none,58,44,53


In [4]:
train.drop('Unnamed: 0', axis = 1 , inplace= True)

In [5]:
from sklearn import preprocessing 
label_encoder = preprocessing.LabelEncoder()

In [6]:
train['test preparation course']= label_encoder.fit_transform(train['test preparation course']) 
train['gender']= label_encoder.fit_transform(train['gender'])
train['lunch']= label_encoder.fit_transform(train['lunch'])
train['ethnicity']= label_encoder.fit_transform(train['ethnicity']) 
train['parental level of education']= label_encoder.fit_transform(train['parental level of education'])

In [7]:
scaler = preprocessing.MinMaxScaler() 
train['reading score'] = scaler.fit_transform(train[['reading score']]) 
train['writing score'] = scaler.fit_transform(train[['writing score']]) 

In [8]:
train.head()

Unnamed: 0,gender,ethnicity,parental level of education,lunch,test preparation course,reading score,writing score,math score
0,1,2,4,1,1,0.493506,0.552941,61
1,0,2,0,1,1,0.506494,0.447059,53
2,0,2,4,0,0,0.675325,0.647059,67
3,1,2,5,0,1,0.688312,0.588235,79
4,1,0,2,0,1,0.454545,0.341176,53


In [9]:
import keras
from keras.models import Sequential
from keras.layers import Dense

In [10]:
from tensorflow.keras import layers
from kerastuner.tuners import RandomSearch

In [11]:
x = train[['gender','ethnicity','parental level of education','lunch','test preparation course','reading score','writing score']]

In [12]:
y = train['math score']

In [13]:
from sklearn.model_selection import train_test_split 

In [14]:
X_train,X_test,Y_train,Y_test = train_test_split(x,y,test_size = 0.15,random_state = 1)

In [15]:
def build_model(hp):
    model = keras.Sequential()
    for i in range(hp.Int('num_layers', 2, 20)):
        model.add(layers.Dense(units=hp.Int('units_' + str(i),
                                            min_value=32,
                                            max_value=512,
                                            step=32),
                               activation='relu'))
    model.add(layers.Dense(1, activation='linear'))
    model.compile(
        optimizer=keras.optimizers.Adam(
            hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])),
        loss='mean_absolute_error',
        metrics=['mean_absolute_error'])
    return model

In [16]:
tuner = RandomSearch(
    build_model,
    objective='val_mean_absolute_error',
    max_trials=10,
    executions_per_trial=3,
    directory='project1',
    project_name='Exam mark1')

INFO:tensorflow:Reloading Oracle from existing project project1\Exam mark1\oracle.json
INFO:tensorflow:Reloading Tuner from project1\Exam mark1\tuner0.json


In [17]:
tuner.search(X_train, Y_train,
             epochs=5,
             validation_data=(X_test, Y_test))

INFO:tensorflow:Oracle triggered exit


In [18]:
tuner.results_summary()

Results summary
Results in project1\Exam mark1
Showing 10 best trials
Objective(name='val_mean_absolute_error', direction='min')
Trial summary
Hyperparameters:
num_layers: 7
units_0: 192
units_1: 416
learning_rate: 0.01
units_2: 64
units_3: 480
units_4: 288
units_5: 96
units_6: 192
units_7: 448
units_8: 288
units_9: 160
units_10: 320
units_11: 288
units_12: 480
units_13: 64
units_14: 512
units_15: 32
units_16: 256
units_17: 96
units_18: 480
Score: 6.468705177307129
Trial summary
Hyperparameters:
num_layers: 11
units_0: 512
units_1: 64
learning_rate: 0.01
units_2: 384
units_3: 320
units_4: 256
units_5: 192
units_6: 320
units_7: 160
units_8: 256
units_9: 32
units_10: 32
Score: 6.766413847605388
Trial summary
Hyperparameters:
num_layers: 9
units_0: 192
units_1: 448
learning_rate: 0.001
units_2: 32
units_3: 32
units_4: 32
units_5: 32
units_6: 32
units_7: 32
units_8: 32
Score: 7.060409704844157
Trial summary
Hyperparameters:
num_layers: 9
units_0: 96
units_1: 480
learning_rate: 0.001
units_

In [19]:
X_train.shape

(595, 7)

In [20]:
model = Sequential()
model.add(Dense(64, input_dim=7, activation='relu'))
model.add(Dense(480, input_dim= 64, activation='relu'))
model.add(Dense(288, input_dim= 480, activation='relu'))
model.add(Dense(96, input_dim= 288, activation='relu'))
model.add(Dense(192, input_dim= 96, activation='relu'))
model.add(Dense(448, input_dim= 192, activation='relu'))
model.add(Dense(288, input_dim= 448, activation='relu'))
model.add(Dense(160, input_dim= 288, activation='relu'))
model.add(Dense(320, input_dim= 160, activation='relu'))
model.add(Dense(288, input_dim= 320, activation='relu'))
model.add(Dense(480, input_dim= 288, activation='relu'))
model.add(Dense(64, input_dim= 480, activation='relu'))
model.add(Dense(512, input_dim= 64, activation='relu'))
model.add(Dense(32, input_dim= 512, activation='relu'))
model.add(Dense(256, input_dim= 32, activation='relu'))
model.add(Dense(96, input_dim= 256, activation='relu'))
model.add(Dense(480, input_dim= 96, activation='relu'))
model.add(Dense(1, activation='linear'))

In [21]:
keras.optimizers.Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, amsgrad=False)
model.compile(loss='mean_squared_error', optimizer='RMSprop', metrics=['mean_absolute_percentage_error'])

In [22]:
from keras import callbacks 
earlystopping = callbacks.EarlyStopping(monitor ="val_loss",  
                                        mode ="min", patience = 5,  
                                        restore_best_weights = True) 

In [23]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_3 (Dense)              (None, 64)                512       
_________________________________________________________________
dense_4 (Dense)              (None, 480)               31200     
_________________________________________________________________
dense_5 (Dense)              (None, 288)               138528    
_________________________________________________________________
dense_6 (Dense)              (None, 96)                27744     
_________________________________________________________________
dense_7 (Dense)              (None, 192)               18624     
_________________________________________________________________
dense_8 (Dense)              (None, 448)               86464     
_________________________________________________________________
dense_9 (Dense)              (None, 288)              

In [24]:
history = model.fit(X_train, Y_train, epochs=50, batch_size= 1,validation_split=0.15,validation_data=None,verbose=1,callbacks =[earlystopping])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50


In [25]:
test.head()

Unnamed: 0.1,Unnamed: 0,gender,ethnicity,parental level of education,lunch,test preparation course,reading score,writing score
0,0,female,group C,some college,standard,none,67,72
1,1,male,group E,some college,standard,none,60,59
2,2,female,group C,bachelor's degree,standard,none,93,95
3,3,male,group D,associate's degree,standard,none,48,46
4,4,female,group B,associate's degree,standard,none,76,80


In [26]:
test.drop('Unnamed: 0', axis =1, inplace = True)

In [27]:
test['test preparation course']= label_encoder.fit_transform(test['test preparation course']) 
test['gender']= label_encoder.fit_transform(test['gender'])
test['lunch']= label_encoder.fit_transform(test['lunch'])
test['ethnicity']= label_encoder.fit_transform(test['ethnicity']) 
test['parental level of education']= label_encoder.fit_transform(test['parental level of education'])

In [28]:
test['reading score'] = scaler.fit_transform(test[['reading score']]) 
test['writing score'] = scaler.fit_transform(test[['writing score']]) 

In [29]:
output = model.predict(test)

In [30]:
output

array([[ 65.99905 ],
       [ 73.237045],
       [ 86.30706 ],
       [ 59.929806],
       [ 70.896286],
       [ 87.01982 ],
       [ 67.25601 ],
       [ 72.208496],
       [ 89.946686],
       [ 72.57996 ],
       [ 85.52898 ],
       [ 60.061134],
       [ 66.01727 ],
       [ 92.98656 ],
       [ 52.209454],
       [ 62.62111 ],
       [ 73.40531 ],
       [ 79.15696 ],
       [ 61.09843 ],
       [ 52.819996],
       [ 72.68642 ],
       [ 83.4048  ],
       [ 72.09285 ],
       [ 74.93468 ],
       [ 57.931675],
       [ 58.233852],
       [ 43.20313 ],
       [ 54.065434],
       [ 52.84318 ],
       [ 47.86532 ],
       [ 52.796093],
       [ 54.002457],
       [ 75.50676 ],
       [ 57.085182],
       [ 76.13634 ],
       [ 72.23854 ],
       [ 71.63377 ],
       [ 68.61684 ],
       [ 73.31572 ],
       [ 74.57116 ],
       [ 64.94887 ],
       [ 48.863632],
       [ 65.69302 ],
       [ 52.79578 ],
       [ 57.447643],
       [ 84.43791 ],
       [ 56.266895],
       [ 73.4

In [31]:
submission = pd.DataFrame(output)
submission.to_csv('outputpp.csv', index=False)