In [14]:
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# For preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# For evaluation
from sklearn.metrics import mean_squared_error

# Import keras components
from tensorflow import keras
from keras import layers, callbacks

In [None]:
all_train = pd.read_csv('kbtu-data-science-challenge-2025-entry-task-new/train.csv')

all_train['is_perfect'] = (all_train['final_math_score'] == 100).astype(int)

all_train.head()

Unnamed: 0,student_id,gender,age,study_hours,attendance_rate,previous_scores,parental_education,school_type,extracurricular,final_math_score,is_perfect
0,1001,Male,17,9.1,68.7,70.0,Bachelor’s,Private,3,86.2,0
1,1002,Female,17,10.5,66.6,84.8,Bachelor’s,Public,3,90.6,0
2,1003,Male,17,17.4,58.8,73.8,High School,Private,3,94.1,0
3,1004,Male,17,8.1,80.4,45.0,High School,Public,1,82.9,0
4,1005,Male,18,17.7,73.8,51.1,Master’s,Public,2,98.6,0


In [16]:
# Preprocessor setup

target = 'is_perfect'
features = ['gender', 'age', 'study_hours', 'attendance_rate', 
            'previous_scores', 'parental_education', 'school_type', 'extracurricular']

num_cols = ['age', 'study_hours', 'attendance_rate', 'previous_scores', 'extracurricular']
cat_cols = ['gender', 'parental_education', 'school_type']

numeric_transformer = Pipeline(steps=[
    ('scaler', StandardScaler())
])
categorical_transformer = Pipeline(steps=[
    ('onehot', OneHotEncoder(drop='first'))
])
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, num_cols),
        ('cat', categorical_transformer, cat_cols)
    ])

In [17]:
x = preprocessor.fit_transform(all_train[features])
y = all_train[target]

x_train, x_val, y_train, y_val = train_test_split(
    x, y, test_size=0.2, random_state=42)

print("Processed training shape:", x_train.shape)

Processed training shape: (3200, 9)


In [28]:
def build_classifier(input_dim):
    model = keras.Sequential([
        layers.Dense(64, activation='relu', input_dim=input_dim),
        layers.Dropout(0.01),
        layers.Dense(32, activation='relu'),
        layers.Dropout(0.01),
        layers.Dense(1, activation='sigmoid')
    ])

    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

input_dim_class = x.shape[1]
classifier = build_classifier(input_dim_class)
classifier.summary()

In [29]:
early_stop_class = callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

history_class = classifier.fit(
    x_train, y_train,
    validation_data=(x_val, y_val),
    epochs=100,
    batch_size=32,
    callbacks=[early_stop_class],
    verbose=1
)

Epoch 1/100
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.6897 - loss: 0.5482 - val_accuracy: 0.9125 - val_loss: 0.2241
Epoch 2/100
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9063 - loss: 0.2312 - val_accuracy: 0.9187 - val_loss: 0.1731
Epoch 3/100
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9178 - loss: 0.1940 - val_accuracy: 0.9287 - val_loss: 0.1627
Epoch 4/100
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9077 - loss: 0.2077 - val_accuracy: 0.9262 - val_loss: 0.1705
Epoch 5/100
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9137 - loss: 0.1921 - val_accuracy: 0.9312 - val_loss: 0.1605
Epoch 6/100
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9142 - loss: 0.1974 - val_accuracy: 0.9162 - val_loss: 0.1715
Epoch 7/100
[1m100/10

In [30]:
val_loss, val_accuracy = classifier.evaluate(x_val, y_val, verbose=0)
print("Validation Accuracy:", val_accuracy)

Validation Accuracy: 0.9312499761581421


In [24]:
model_path = 'models/classifier.keras'

# classifier.save(model_path)

model = keras.models.load_model(model_path)

val_loss, val_accuracy = model.evaluate(x_val, y_val, verbose=0)
print("Validation Accuracy:", val_accuracy)

Validation Accuracy: 0.9337499737739563
