In [14]:
import numpy as np
import pandas as pd
import tensorflow as tf
from keras.layers import Input, Dense, Concatenate
from keras.models import Model
from tensorflow import keras

In [15]:
data = pd.read_csv('FULLUwi.csv')

In [16]:
data.head()

Unnamed: 0,Subject,CourseCode,CourseType,CourseType1,Semester,Year,Faculty,Level,Location,Lecturer,Students,Seats,Room,Attribute
0,ACCT,1005,E,E11,1,2021/2022,FSS,1,Mona - Weekend,"Paul, Dwayney (Primary)",63,100,ONLINE,
1,ACCT,1005,M,M11,1,2021/2022,FSS,1,Mona,"Thomas-Stone, Joan (Primary)",160,160,ONLINE,
2,ACCT,1005,M,M12,1,2021/2022,FSS,1,Mona,"Paul, Dwayney (Primary)",156,160,ONLINE,
3,ACCT,1005,M,M13,1,2021/2022,FSS,1,Mona,"Falconer, Debbie (Primary)",159,160,ONLINE,
4,ACCT,1005,M,M14,1,2021/2022,FSS,1,Mona,"Thomas-Stone, Joan (Primary)",160,160,ONLINE,


In [17]:
list_features = data.columns
print('They are',len(list_features),'features in the dataset.')
print('----------------')
for f in list_features:
    print('feature:', f, '|| Type:', type(data[f][0]), '|| Example:', data[f][0], '|| number of unique values', len(data[f].unique()) )

They are 14 features in the dataset.
----------------
feature: Subject || Type: <class 'str'> || Example: ACCT || number of unique values 194
feature: CourseCode || Type: <class 'numpy.int64'> || Example: 1005 || number of unique values 1256
feature: CourseType || Type: <class 'str'> || Example: E || number of unique values 17
feature: CourseType1 || Type: <class 'str'> || Example: E11 || number of unique values 477
feature: Semester || Type: <class 'numpy.int64'> || Example: 1 || number of unique values 4
feature: Year || Type: <class 'str'> || Example: 2021/2022 || number of unique values 5
feature: Faculty || Type: <class 'str'> || Example: FSS || number of unique values 36
feature: Level || Type: <class 'str'> || Example: 1 || number of unique values 11
feature: Location || Type: <class 'str'> || Example: Mona - Weekend || number of unique values 18
feature: Lecturer || Type: <class 'str'> || Example: Paul, Dwayney (Primary)  || number of unique values 4946
feature: Students || Typ

In [18]:
data.isnull().sum()

Subject            0
CourseCode         0
CourseType         2
CourseType1        0
Semester           0
Year               0
Faculty          718
Level              0
Location           0
Lecturer        4247
Students           0
Seats              0
Room             718
Attribute      50160
dtype: int64

In [19]:
def extraLec(name):
  if len(name.split())>3:
    return 'Yes'
  else:
    return 'No'

In [20]:
values = {'CourseType':'None','Faculty':'UN','Lecturer':'UN','Room':'UN','Attribute':'None'}
data= data.fillna(value=values)

In [21]:
column = ['Attribute']
dataframe = data.loc[:,~data.columns.isin(column)].copy()
labels = dataframe.pop("Students")
dataset = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
dataset = dataset.shuffle(buffer_size=len(dataframe))
feature_space = tf.keras.utils.FeatureSpace(
    features={
        "Subject": "string_categorical",
        "CourseCode": "integer_categorical",
        "CourseType": "string_categorical",
        "CourseType1": "string_categorical",
        "Semester": "integer_categorical",
        "Year": "string_categorical",
        "Faculty": "string_categorical",
        "Level": "string_categorical",
        "Location": "string_categorical",
        "Lecturer": "string_categorical",
        "Room": "string_categorical",
        "Seats": "float_normalized",
    },
    output_mode="concat",
)
dataset = tf.data.Dataset.from_tensor_slices(dict(dataframe))
feature_space.adapt(dataset)

# You can call the FeatureSpace on a dict of data (batched or unbatched).
output_vector = feature_space(dict(dataframe))

In [22]:
X_train = output_vector[:40000]
X_test = output_vector[40000:]
y_train = data['Students'][:40000]
y_test = data['Students'][40000:]

In [23]:
tf.random.set_seed(42)
model = keras.models.Sequential([
 keras.layers.Dense(75, activation="relu",  input_shape=X_train.shape[1:]),
 keras.layers.Dense(20, activation="relu"),
 keras.layers.Dense(5, activation="relu"),
 keras.layers.Dense(1)
])
early_stopping_cb = keras.callbacks.EarlyStopping(patience=10,
 restore_best_weights=True)
model_checkpoint_cb = keras.callbacks.ModelCheckpoint("my_model.h5", save_best_only=True)
model.compile(loss="mean_absolute_error", optimizer = 'sgd')
history = model.fit(X_train, y_train, epochs=100,
 validation_data=(X_test, y_test), callbacks=[early_stopping_cb,model_checkpoint_cb])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100


In [24]:
prediction = model.predict(output_vector)
data['PreStudents'] = prediction
data.to_csv('PTest.csv',index=False)

