In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

df = pd.read_csv("teacher_feedback_examples_ru.csv", sep=";", encoding="cp1251")

X = df["comment"]
Y = df[["crit1","crit2","crit3"]]

X_train, X_test, y_train, y_test = train_test_split(
    X, Y, test_size=0.2, random_state=42
)

vectorizer = TfidfVectorizer(
    ngram_range=(1,2),
    max_features=5000
)

X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

for col in ["crit1","crit2","crit3"]:
    print("\n====", col, "====")

    clf = LogisticRegression(max_iter=1000)
    clf.fit(X_train_vec, y_train[col])

    preds = clf.predict(X_test_vec)
    print(classification_report(y_test[col], preds))


==== crit1 ====
              precision    recall  f1-score   support

           0       0.89      1.00      0.94        34
           1       0.00      0.00      0.00         4

    accuracy                           0.89        38
   macro avg       0.45      0.50      0.47        38
weighted avg       0.80      0.89      0.85        38


==== crit2 ====
              precision    recall  f1-score   support

           0       0.97      1.00      0.99        37
           1       0.00      0.00      0.00         1

    accuracy                           0.97        38
   macro avg       0.49      0.50      0.49        38
weighted avg       0.95      0.97      0.96        38


==== crit3 ====
              precision    recall  f1-score   support

           0       1.00      0.13      0.24        15
           1       0.64      1.00      0.78        23

    accuracy                           0.66        38
   macro avg       0.82      0.57      0.51        38
weighted avg       0.78

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


In [2]:
wrong = X_test[y_test["crit1"] != preds]
print(wrong.head(10))

184                             Выйди и зайди нормально!
163                   Как ты не понимаешь! Я же понимаю!
18                                    Ты на верном пути!
15                                         Замечательно!
67                                          Так держать!
108                   Я никогда не видел ничего лучшего.
45                                     Это то, что надо!
76                                           Наконец-то!
16                                    Прекрасное начало!
132    Ты что, действительно такой умный? - Кто, я? -...
Name: comment, dtype: str


In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

df = pd.read_csv("teacher_feedback_examples_ru.csv", sep=";", encoding="cp1251")

X = df["comment"]
y = df["crit3"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

vectorizer = TfidfVectorizer(
    ngram_range=(1,2),
    max_features=5000
)

X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

clf = LogisticRegression(max_iter=1000, class_weight="balanced")
clf.fit(X_train_vec, y_train)

preds = clf.predict(X_test_vec)

print(classification_report(y_test, preds))

              precision    recall  f1-score   support

           0       0.87      0.87      0.87        15
           1       0.91      0.91      0.91        23

    accuracy                           0.89        38
   macro avg       0.89      0.89      0.89        38
weighted avg       0.89      0.89      0.89        38



In [11]:
feature_names = vectorizer.get_feature_names_out()
coefs = clf.coef_[0]

top_pos = sorted(zip(coefs, feature_names))[-20:]
top_neg = sorted(zip(coefs, feature_names))[:20]

print("Supportive:")
print(top_pos)

print("Non-supportive:")
print(top_neg)

Supportive:
[(np.float64(0.33588394534359955), 'молодец'), (np.float64(0.33588394534359955), 'необыкновенно'), (np.float64(0.33588394534359955), 'отлично'), (np.float64(0.33588394534359955), 'поражен'), (np.float64(0.33588394534359955), 'потрясающе'), (np.float64(0.33588394534359955), 'правильно'), (np.float64(0.33588394534359955), 'сенсационно'), (np.float64(0.33588394534359955), 'совершенно'), (np.float64(0.33588394534359955), 'талантливо'), (np.float64(0.33588394534359955), 'фантастика'), (np.float64(0.3516614439570385), 'работу'), (np.float64(0.37690597702943657), 'горжусь'), (np.float64(0.3898325276933381), 'тобой'), (np.float64(0.3930998205739018), 'очень'), (np.float64(0.4009899176645982), 'значительно'), (np.float64(0.4616283700611946), 'лучше'), (np.float64(0.5783011902940383), 'великолепно'), (np.float64(0.5998790687192113), 'ты'), (np.float64(0.726542121046756), 'хорошо'), (np.float64(1.3966477936366029), 'это')]
Non-supportive:
[(np.float64(-0.7188706553813826), 'чем'), (np

In [3]:
wrong = X_test[y_test["crit2"] != preds]
print(wrong.head(10))

163                   Как ты не понимаешь! Я же понимаю!
18                                    Ты на верном пути!
15                                         Замечательно!
67                                          Так держать!
108                   Я никогда не видел ничего лучшего.
45                                     Это то, что надо!
76                                           Наконец-то!
16                                    Прекрасное начало!
132    Ты что, действительно такой умный? - Кто, я? -...
60                                          Превосходно!
Name: comment, dtype: str


In [4]:
wrong = X_test[y_test["crit3"] != preds]
print(wrong.head(10))

184                             Выйди и зайди нормально!
163                   Как ты не понимаешь! Я же понимаю!
76                                           Наконец-то!
132    Ты что, действительно такой умный? - Кто, я? -...
115                            Вот этого я еще не видел.
152             Я завтра неожиданно дам вам контрольную.
137                   Здесь и так душно, а еще вы орете!
187                  Сейчас рассажу эту сладкую парочку!
150              Ты весь урок смотришь на меня затылком.
167                 Скажи всем, мы все вместе посмеемся!
Name: comment, dtype: str


In [12]:
import pandas as pd
import numpy as np

from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer

from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from sklearn.naive_bayes import ComplementNB
from sklearn.ensemble import RandomForestClassifier, HistGradientBoostingClassifier

df = pd.read_csv("teacher_feedback_examples_ru.csv", sep=";", encoding="cp1251")
X = df["comment"].astype(str)
y = df["crit3"].astype(int)

cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

models = {
    "LogReg_balanced": LogisticRegression(max_iter=2000, class_weight="balanced"),
    "LinearSVC": LinearSVC(class_weight="balanced"),
    "ComplementNB": ComplementNB(),
    # Деревья: часто хуже на TF-IDF, но попробуем, чтобы закрыть запрос консультанта
    "RandomForest": RandomForestClassifier(n_estimators=500, random_state=42),
    # бустинг по деревьям, который умеет работать с разреженными (HistGB — на dense; тут может быть ограничение)
}

for name, clf in models.items():
    pipe = Pipeline([
        ("tfidf", TfidfVectorizer(ngram_range=(1,2), max_features=5000)),
        ("clf", clf),
    ])
    scores = cross_val_score(pipe, X, y, cv=cv, scoring="f1")
    print(f"{name:16s} F1 mean={scores.mean():.3f} std={scores.std():.3f}")

LogReg_balanced  F1 mean=0.842 std=0.018
LinearSVC        F1 mean=0.835 std=0.015
ComplementNB     F1 mean=0.749 std=0.053
RandomForest     F1 mean=0.768 std=0.013


In [1]:
import pandas as pd

results = {
    "Model": ["LogReg_balanced", "LinearSVC", "ComplementNB", "RandomForest"],
    "F1_mean": [0.842, 0.835, 0.749, 0.768],
    "F1_std": [0.018, 0.015, 0.053, 0.013]
}

results_df = pd.DataFrame(results)
results_df.sort_values("F1_mean", ascending=False)

Unnamed: 0,Model,F1_mean,F1_std
0,LogReg_balanced,0.842,0.018
1,LinearSVC,0.835,0.015
3,RandomForest,0.768,0.013
2,ComplementNB,0.749,0.053


### Results
We evaluated several classical classifiers using 5-fold stratified cross-validation on the annotated corpus (n ≈ 189).
The best performance was achieved by TF-IDF features combined with Logistic Regression (F1 = 0.84 ± 0.02). LinearSVC showed comparable results (F1 = 0.83 ± 0.02), while tree-based models (Random Forest) and Naive Bayes underperformed. The low standard deviation across folds indicates stable performance despite the relatively small dataset. Due to extreme class imbalance and insufficient positive instances, criteria 1 and 2 were excluded from reliable modeling. Only supportive tone (criterion 3) demonstrated consistent learnable signal.

### Conclusion
The experiment demonstrates that supportive tone in teacher feedback can be reliably detected using surface lexical features, even in a relatively small dataset.
Linear models outperform tree-based methods for TF-IDF text representations. The results confirm that affective and evaluative lexical markers provide strong predictive signal. Future work may include expanding the dataset and exploring contextual embeddings; however, classical models already provide robust baseline performance.