In [2]:
import numpy as np
import random
import string
import pandas as pd

# generate normal distribution numbers from 140 to 200 with mean 180 and standard deviation 10
AMOUNT_OF_PRIVILEGED_STUDENTS = 15

def generate_random_name(length):
    letters = string.ascii_lowercase
    return ''.join(random.choice(letters) for _ in range(length))


def generate_students():

    students_MATH = np.random.normal(180, 10, 1500)
    students_EN = np.random.normal(180, 10, 1500)
    students_UA = np.random.normal(180, 10, 1500)
    student_id = np.array([i for i in range(1500)])

# students with privilege - ~10% of all students, most of them have low grades

    students_MATH[0:AMOUNT_OF_PRIVILEGED_STUDENTS] = np.random.normal(150, 20, AMOUNT_OF_PRIVILEGED_STUDENTS)
    students_EN[0:AMOUNT_OF_PRIVILEGED_STUDENTS] = np.random.normal(150, 20, AMOUNT_OF_PRIVILEGED_STUDENTS)
    students_UA[0:AMOUNT_OF_PRIVILEGED_STUDENTS] = np.random.normal(150, 20, AMOUNT_OF_PRIVILEGED_STUDENTS)
    
# max array value - 200

    students_MATH[students_MATH > 200] = 200
    students_EN[students_EN > 200] = 200
    students_UA[students_UA > 200] = 200

# students with privelege - 1, others - 0

    students_PRIV = np.zeros(1500)
    students_PRIV[0:AMOUNT_OF_PRIVILEGED_STUDENTS] = 1

# 1500x6 matrix with students grades, privelege and names

    students = np.zeros((1500, 6))

    students[:, 0] = students_MATH
    students[:, 1] = students_EN
    students[:, 2] = students_UA
    students[:, 3] = students_PRIV
    students[:, 4] = student_id

# shuffle students

    np.random.shuffle(students)
    return students


def exam_mark(results):
    return 0.4 * results[0] + 0.3 * results[1] + 0.3 * results[2]


def priv_student_good(results):
    return (exam_mark(results) > 144 and results[0] > 120 and results[1] > 120 and results[2] > 120 and results[3] == 1)

def non_priv_student_good(results):
    return (exam_mark(results) > 160  and results[3] == 0)

def array_to_excel(array):
    df = pd.DataFrame(array)
    df.to_excel('students.xlsx', header=False, index=False)


def choose_students(students):

    sorted_students = sorted(students, key=lambda x: (0.4 * x[0] + 0.3 * x[1] + 0.3 * x[2]) + 200 * x[3], reverse=True)
    sorted_students = np.array(sorted_students)

    
    filtered_priv_students = list(filter(lambda x: priv_student_good(x), sorted_students))
    if len(filtered_priv_students) > 35:
        filtered_priv_students = filtered_priv_students[:35]
    
    filtered_non_priv_students = list(filter(lambda x: non_priv_student_good(x), sorted_students))
    if len(filtered_non_priv_students) > 350 - len(filtered_priv_students):
        filtered_non_priv_students = filtered_non_priv_students[:350 - len(filtered_priv_students)]
    


    filtered_priv_students = np.array(filtered_priv_students)
    filtered_non_priv_students = np.array(filtered_non_priv_students)

    for i in range(len(filtered_non_priv_students)):
        # if student with same is in students, then change 6 column of that student to 1 in students array
        if filtered_non_priv_students[i][4] in students[:, 4]:
            students[np.where(students[:, 4] == filtered_non_priv_students[i][4]), 5] = 1

    for i in range(len(filtered_priv_students)):
        # if student with same is in students, then change 6 column of that student to 1 in students array
        if filtered_priv_students[i][4] in students[:, 4]:
            students[np.where(students[:, 4] == filtered_priv_students[i][4]), 5] = 1

    return students

    

choose_students(generate_students())


array([[1.87385132e+02, 1.71591024e+02, 1.73257960e+02, 0.00000000e+00,
        9.49000000e+02, 0.00000000e+00],
       [1.77869467e+02, 1.76342195e+02, 1.84451173e+02, 0.00000000e+00,
        4.07000000e+02, 0.00000000e+00],
       [1.99366183e+02, 1.69539639e+02, 1.89954237e+02, 0.00000000e+00,
        1.01000000e+02, 1.00000000e+00],
       ...,
       [1.87203952e+02, 1.69945375e+02, 1.91572259e+02, 0.00000000e+00,
        1.13700000e+03, 0.00000000e+00],
       [1.73791673e+02, 1.76668596e+02, 1.79175231e+02, 0.00000000e+00,
        1.13300000e+03, 0.00000000e+00],
       [1.81803324e+02, 1.79291169e+02, 1.75854247e+02, 0.00000000e+00,
        8.33000000e+02, 0.00000000e+00]])

In [63]:
import tensorflow as tf

train_size = 100
test_size = 20

# train data

generated = [choose_students(generate_students()) for _ in range(train_size)]

x_train = [generated[i][:, 0:4] for i in range(train_size)]
y_train = [generated[i][:, 5] for i in range(train_size)]

# test data

generated_test = [choose_students(generate_students()) for _ in range(test_size)]

x_test = [generated_test[i][:, 0:4] for i in range(test_size)]
y_test = [generated_test[i][:, 5] for i in range(test_size)]





In [66]:
# convert every number to float32
import keras

x_train_data = np.array(x_train)
y_train_data = np.array(y_train)


x_test_data = np.array(x_test)
y_test_data = np.array(y_test).astype('int32')

print(x_train_data.shape)
print(y_train_data.shape)

# reshape into 1d array

x_train_data = x_train_data.reshape(train_size * 1500, 4)

print(x_train_data)

y_train_data = y_train_data.reshape(train_size * 1500, 1).astype('int32')
# model

model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(1000, activation='relu', input_shape=(4, )),
     tf.keras.layers.Dense(1000, activation='relu', ),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

model.fit(x_train_data, y_train_data, epochs=20, batch_size=50)

model.summary()

model.save('model.h5')

network = tf.keras.models.load_model('model.h5')



(100, 1500, 4)
(100, 1500)
[[172.97130761 183.70429831 169.35319288   0.        ]
 [193.21238311 185.15560033 181.91080721   0.        ]
 [174.97383672 187.51059458 170.73369511   0.        ]
 ...
 [176.74093289 184.23658929 171.00937631   0.        ]
 [181.8215644  180.30955175 176.85820582   0.        ]
 [187.09157378 196.19726268 181.20689092   0.        ]]
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Model: "sequential_39"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_94 (Dense)            (None, 1000)              5000      
                                                                 
 dense_95 (Dense)            (None, 1000)              1001000   
                                                     

In [75]:

x_test_data = x_test_data.reshape(test_size * 1500, 4)

predictions = network.predict(x_test_data[0:1500])

expected = y_test_data.reshape(test_size, 1500)


# make an excel file with marks, privelege, expected and predicted values

df = pd.DataFrame(generated_test[0][:, 0:4])
df['expected'] = expected[0]
df['predicted'] = predictions
df.to_excel('test.xlsx', header=False, index=False)




