## Model Building

In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.read_csv('../datasets/final_admission_data.csv')
df.head()

Unnamed: 0,researchExp,industryExp,toeflScore,greV,greQ,greA,univName,cgpaNorm
0,0,66,94.0,146.0,157.0,3.0,Worcester Polytechnic Institute,0.7828
1,0,0,81.0,148.0,161.0,2.5,Worcester Polytechnic Institute,0.57
2,0,0,104.0,150.0,161.0,4.5,Worcester Polytechnic Institute,0.622
3,0,0,95.0,147.0,156.0,3.0,Worcester Polytechnic Institute,0.52
4,0,0,101.0,152.0,158.0,3.0,Worcester Polytechnic Institute,0.64


In [3]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from imblearn.over_sampling import SMOTE
from imblearn.pipeline import Pipeline
import warnings
warnings.filterwarnings('ignore')
from keras import Sequential
from keras.layers import Dense, Dropout
from keras.utils import np_utils

In [4]:
y = df['univName']                 # Name of the University is the target
X = df.drop(['univName'], axis=1)  # Remove University name to training data

In [5]:
X.shape, y.shape 

((20864, 7), (20864,))

#### Converting University names into different categories

In [6]:
encoder = LabelEncoder()
encoder.fit(y)
encoded_Y = encoder.transform(y)

In [7]:
Y = np_utils.to_categorical(encoded_Y)

#### Oversampling imbalanced data

In [8]:
smote = SMOTE(k_neighbors=3)

In [9]:
X1, Y1 = smote.fit_resample(X, Y)

#### Scaling input data

In [10]:
scaler = StandardScaler()

In [11]:
X_transformed = scaler.fit_transform(X1)

#### Splitting into training and testing data

In [12]:
X_train, X_test, Y_train, Y_test = train_test_split(X_transformed,Y1,test_size=0.2,random_state=42,shuffle=True)

In [13]:
X_train.shape, X_test.shape, Y_train.shape, Y_test.shape

((80537, 7), (20135, 7), (80537, 52), (20135, 52))

#### Creating Keras neural network

In [14]:
model = Sequential(name="recommender")
model.add(Dense(256, input_dim = X_test.shape[1], activation = 'relu')) # Rectified Linear Unit Activation Function
model.add(Dense(512, activation = 'relu'))
model.add(Dropout(0.1))
model.add(Dense(512, activation = 'relu'))
model.add(Dropout(0.1))
model.add(Dense(512, activation = 'relu'))
model.add(Dropout(0.1))
model.add(Dense(256, activation = 'relu'))
model.add(Dense(Y_test.shape[1], activation = 'softmax')) # Softmax for multi-class classification
# Compile model
model.compile(loss = 'categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])
model.summary()

Model: "recommender"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 256)               2048      
_________________________________________________________________
dense_1 (Dense)              (None, 512)               131584    
_________________________________________________________________
dropout (Dropout)            (None, 512)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 512)               262656    
_________________________________________________________________
dropout_1 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 512)               262656    
_________________________________________________________________
dropout_2 (Dropout)          (None, 512)               

In [15]:
model.fit(X_train, Y_train, epochs=150, batch_size=128, verbose=2)

Epoch 1/150
630/630 - 11s - loss: 3.4139 - accuracy: 0.1217
Epoch 2/150
630/630 - 9s - loss: 3.1180 - accuracy: 0.1894
Epoch 3/150
630/630 - 11s - loss: 2.9153 - accuracy: 0.2329
Epoch 4/150
630/630 - 11s - loss: 2.7452 - accuracy: 0.2719
Epoch 5/150
630/630 - 11s - loss: 2.6022 - accuracy: 0.3009
Epoch 6/150
630/630 - 11s - loss: 2.4815 - accuracy: 0.3279
Epoch 7/150
630/630 - 11s - loss: 2.3765 - accuracy: 0.3506
Epoch 8/150
630/630 - 12s - loss: 2.2882 - accuracy: 0.3703
Epoch 9/150
630/630 - 11s - loss: 2.2135 - accuracy: 0.3882
Epoch 10/150
630/630 - 11s - loss: 2.1468 - accuracy: 0.4023
Epoch 11/150
630/630 - 11s - loss: 2.0866 - accuracy: 0.4148
Epoch 12/150
630/630 - 11s - loss: 2.0396 - accuracy: 0.4250
Epoch 13/150
630/630 - 11s - loss: 1.9912 - accuracy: 0.4360
Epoch 14/150
630/630 - 11s - loss: 1.9424 - accuracy: 0.4455
Epoch 15/150
630/630 - 11s - loss: 1.9118 - accuracy: 0.4536
Epoch 16/150
630/630 - 11s - loss: 1.8766 - accuracy: 0.4625
Epoch 17/150
630/630 - 11s - loss:

Epoch 135/150
630/630 - 11s - loss: 1.1154 - accuracy: 0.6407
Epoch 136/150
630/630 - 11s - loss: 1.1144 - accuracy: 0.6407
Epoch 137/150
630/630 - 11s - loss: 1.1100 - accuracy: 0.6406
Epoch 138/150
630/630 - 11s - loss: 1.1134 - accuracy: 0.6410
Epoch 139/150
630/630 - 11s - loss: 1.1152 - accuracy: 0.6417
Epoch 140/150
630/630 - 11s - loss: 1.1093 - accuracy: 0.6411
Epoch 141/150
630/630 - 11s - loss: 1.1075 - accuracy: 0.6410
Epoch 142/150
630/630 - 11s - loss: 1.1015 - accuracy: 0.6429
Epoch 143/150
630/630 - 11s - loss: 1.0994 - accuracy: 0.6442
Epoch 144/150
630/630 - 11s - loss: 1.1031 - accuracy: 0.6437
Epoch 145/150
630/630 - 11s - loss: 1.0959 - accuracy: 0.6458
Epoch 146/150
630/630 - 11s - loss: 1.0964 - accuracy: 0.6432
Epoch 147/150
630/630 - 12s - loss: 1.0923 - accuracy: 0.6466
Epoch 148/150
630/630 - 13s - loss: 1.0929 - accuracy: 0.6451
Epoch 149/150
630/630 - 11s - loss: 1.0961 - accuracy: 0.6462
Epoch 150/150
630/630 - 11s - loss: 1.0878 - accuracy: 0.6482


<keras.callbacks.History at 0x1e04fd90d30>

In [16]:
# researchExp, industryExp, toeflScore, greV, greQ, greA, cgpaNorm
test_data = scaler.transform([[0, 0, 113, 158, 166, 4.5, 0.8]])
print(test_data)

[[-0.13438372 -0.29629694  0.86058662  0.37686211  0.71623673  1.45314144
   0.14748754]]


In [17]:
pred = model.predict(test_data)[0]
print(pred)

[5.0733128e-04 1.3313101e-28 1.5352349e-03 2.1607613e-07 4.8508685e-08
 8.7651533e-06 4.8379252e-11 1.9158901e-03 2.1868270e-06 4.5879133e-15
 8.5450913e-10 1.1079310e-04 6.0293922e-04 2.4318374e-06 1.6041174e-11
 2.3183487e-04 2.0010530e-07 1.4473535e-06 1.6581333e-06 6.8639294e-04
 4.8307041e-04 5.9673341e-04 2.3056092e-03 2.9021254e-09 4.5839140e-12
 3.5943824e-06 8.6700701e-04 3.0950662e-08 4.1985736e-08 2.1865057e-10
 1.8547983e-04 5.3417502e-04 4.2225682e-04 1.2011651e-05 5.5157818e-07
 3.2573373e-06 2.0596630e-05 3.4074444e-01 2.6888584e-04 1.6050921e-09
 2.9180672e-05 8.1093721e-03 2.4370567e-03 2.7055587e-05 4.7604324e-05
 3.4384776e-04 2.9261793e-05 4.0840292e-05 4.5292565e-01 1.8395498e-01
 1.4204989e-16 1.0274788e-12]


In [18]:
def top_recommendations(n, pred):
  recommendations = []
  for i in range(n):
    pred_class = np.argmax(pred, axis=-1)
    recommendations.append(pred_class)
    pred[pred_class] = -1
  return encoder.inverse_transform(recommendations)

r_unis = top_recommendations(3, pred)
print(r_unis)

['University of Wisconsin Madison' 'University of Michigan Ann Arbor'
 'Virginia Tech']


#### Exporting Assets

In [19]:
import pickle

with open('../models/label_encoder.pickle', 'wb') as file:
    pickle.dump(encoder, file)

with open('../models/std_scaler.pickle', 'wb') as file:
    pickle.dump(scaler, file)

In [20]:
model.save('../models/uni_recommender_150')

INFO:tensorflow:Assets written to: ../models/uni_recommender_150\assets


---