# Подготовка df

In [207]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder

In [208]:
np.random.seed(13)

subjects = ["Mathematics", "Physics", "Chemistry", "Computer Science", "Crafts", "Physical Education"]

grades = ["Satisfactory", "Good", "Excellent"]

df = pd.DataFrame(columns=["id"] + subjects + ["Grade"])

for student_id in range(1, 2002):
    student_data = [student_id]
    for i in range(6):
        score = np.random.randint(0, 100)
        student_data.append(score)
    lab_grade = np.random.choice(grades)
    student_data.append(lab_grade)
    df.loc[len(df)] = student_data

In [209]:
df.set_index("id", inplace=True)

In [210]:
le = LabelEncoder()
df['Grade'] = le.fit_transform(df['Grade'])

In [211]:
df.head()

Unnamed: 0_level_0,Mathematics,Physics,Chemistry,Computer Science,Crafts,Physical Education,Grade
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,82,48,74,16,98,25,2
2,58,99,26,66,46,54,1
3,77,74,73,75,52,2,2
4,19,21,19,11,86,75,1
5,18,24,28,11,40,76,0


In [212]:
df.describe()

Unnamed: 0,Mathematics,Physics,Chemistry,Computer Science,Crafts,Physical Education,Grade
count,2001.0,2001.0,2001.0,2001.0,2001.0,2001.0,2001.0
mean,49.244378,49.74013,50.142429,49.754623,49.190905,47.948526,0.98051
std,28.958759,29.108357,28.33087,28.775289,28.682844,29.115268,0.815549
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,24.0,24.0,26.0,25.0,24.0,23.0,0.0
50%,50.0,50.0,51.0,49.0,49.0,47.0,1.0
75%,75.0,74.0,74.0,76.0,74.0,73.0,2.0
max,99.0,99.0,99.0,99.0,99.0,99.0,2.0


# Подготовка к обучению



In [213]:
from sklearn.model_selection import train_test_split

In [214]:
X_train, X_test, y_train, y_test = train_test_split(df.drop('Grade', axis=1), df.Grade, test_size=0.2, random_state=13)

In [215]:
df.corr().style.background_gradient(cmap="hot_r")

Unnamed: 0,Mathematics,Physics,Chemistry,Computer Science,Crafts,Physical Education,Grade
Mathematics,1.0,0.003702,0.027427,0.022141,0.020291,0.009407,0.013899
Physics,0.003702,1.0,-0.022546,-0.012533,-0.001059,-0.00686,0.063921
Chemistry,0.027427,-0.022546,1.0,0.015337,-0.031169,0.028189,0.010378
Computer Science,0.022141,-0.012533,0.015337,1.0,-0.011109,-0.016266,-0.02575
Crafts,0.020291,-0.001059,-0.031169,-0.011109,1.0,-0.019106,0.026835
Physical Education,0.009407,-0.00686,0.028189,-0.016266,-0.019106,1.0,-0.033313
Grade,0.013899,0.063921,0.010378,-0.02575,0.026835,-0.033313,1.0


Physics, Crafts, Mathematics

In [216]:
from sklearn.ensemble import RandomForestRegressor

rfr = RandomForestRegressor(random_state=13)
rfr.fit(X_train, y_train)

feature_importances = rfr.feature_importances_

feature_importance_df = pd.DataFrame({'subjects': X_train.columns, 'importance': feature_importances})

feature_importance_df = feature_importance_df.sort_values(by='importance', ascending=False)

top_features = feature_importance_df.head(3)
print(top_features)

           subjects  importance
0       Mathematics    0.173139
4            Crafts    0.168097
3  Computer Science    0.167477


In [217]:
X_train1, X_test1, y_train1, y_test1 = train_test_split(df[['Mathematics', 'Crafts', 'Physics']], df.Grade, test_size=0.2, random_state=13)

In [218]:
X_train2, X_test2, y_train2, y_test2 = train_test_split(df[['Mathematics', 'Crafts', 'Computer Science']], df.Grade, test_size=0.2, random_state=13)

# Обучение моделей

In [219]:
pip install catboost



In [220]:
from catboost import CatBoostClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

In [221]:
cbc1 = CatBoostClassifier(iterations=100, depth=4, learning_rate=0.1)
cbc1.fit(X_train1, y_train1, silent=True)

y_pred1 = cbc1.predict(X_test1)

accuracy = accuracy_score(y_test1, y_pred1)
precision = precision_score(y_test1, y_pred1, average=None)
recall = recall_score(y_test1, y_pred1, average=None)
f1 = f1_score(y_test1, y_pred1, average=None)
conf_matrix = confusion_matrix(y_test1, y_pred1)

print(f'Accuracy: {accuracy*100:.2f}%')
print(f'Precision: {precision*100}%')
print(f'Recall: {recall*100}%')
print(f'F1-score: {f1*100}%')
print('Confusion Matrix:')
print(conf_matrix)

Accuracy: 34.41%
Precision: [33.67875648 38.         32.40740741]%
Recall: [48.87218045 26.20689655 28.45528455]%
F1-score: [39.87730061 31.02040816 30.3030303 ]%
Confusion Matrix:
[[65 36 32]
 [66 38 41]
 [62 26 35]]


In [222]:
def predict_student_grade(model, subject_scores):
    """
    Predicts the grade of a student based on their scores in 6 subjects.

    Parameters:
    - model: A trained CatBoostClassifier model.
    - subject_scores: A list of 6 scores for the subjects in the following order:
        [Mathematics, Crafts, Physics, Chemistry, Computer Science, Physical Education]

    Returns:
    - predicted_grade: The predicted grade for the student.
    """
    student_data = pd.DataFrame([subject_scores], columns=['Mathematics', 'Crafts', 'Physics', 'Chemistry', 'Computer Science', 'Physical Education'])

    predicted_grade = model.predict(student_data)[0]

    predicted_grade = le.inverse_transform([predicted_grade])[0]

    return predicted_grade

In [234]:
student_scores = [0, 0, 0, 0, 0, 0]
predicted_student_grade = predict_student_grade(cbc1, student_scores)
print("Predicted Grade:", predicted_student_grade)

Predicted Grade: Good


  y = column_or_1d(y, warn=True)


In [224]:
model_filename = "catboost_model.cbm"
cbc1.save_model(model_filename)

In [225]:
cbc2 = CatBoostClassifier(iterations=100, depth=4, learning_rate=0.1)
cbc2.fit(X_train2, y_train2, silent=True)

y_pred2 = cbc2.predict(X_test2)

accuracy2 = accuracy_score(y_test2, y_pred2)
precision2 = precision_score(y_test2, y_pred2, average=None)
recall2 = recall_score(y_test2, y_pred2, average=None)
f1_2 = f1_score(y_test2, y_pred2, average=None)
conf_matrix2 = confusion_matrix(y_test2, y_pred2)

print(f'Accuracy: {accuracy2*100:.2f}%')
print(f'Precision: {precision2*100}%')
print(f'Recall: {recall2*100}%')
print(f'F1-score: {f1_2*100}%')
print('Confusion Matrix:')
print(conf_matrix2)

Accuracy: 33.17%
Precision: [32.33532934 37.19008264 30.08849558]%
Recall: [40.60150376 31.03448276 27.64227642]%
F1-score: [36.         33.83458647 28.81355932]%
Confusion Matrix:
[[54 42 37]
 [58 45 42]
 [55 34 34]]


In [226]:
import xgboost as xgb

In [227]:
dtrain = xgb.DMatrix(X_train1, label=y_train1)
dtest = xgb.DMatrix(X_test1, label=y_test1)

In [228]:
param = {
    'max_depth': 3,
    'eta': 0.1,
    'num_class': 3
}
num_round = 100
clf = xgb.train(param, dtrain, num_round)

In [229]:
y_pred3 = clf.predict(dtest)
score = accuracy_score(y_test1, y_pred3)

print(f'Accurecy: {score*100:.2f}%')

Accurecy: 36.16%
