# Обучение и сравнение моделей

Автор: Ревнивцев Артем Александрович
Тема ВКР: Интеллектуальная система прогнозирования потребностей в обновлении вычислительной техники.


In [None]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, ExtraTreesClassifier
from sklearn.neural_network import MLPClassifier
import joblib


In [None]:
df = pd.read_csv('data/sample_tickets.csv')
df.head()


In [None]:
target_col = 'needs_upgrade'
categorical_cols = ['user_department', 'device_type', 'priority', 'os', 'location']
numeric_cols = ['device_age_years', 'tickets_last_6_months']

X = df[categorical_cols + numeric_cols]
y = df[target_col].astype(int).values

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)
len(X_train), len(X_test)


In [None]:
categorical_transformer = Pipeline(steps=[('ohe', OneHotEncoder(handle_unknown='ignore'))])
numeric_transformer = Pipeline(steps=[('scaler', StandardScaler())])
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', categorical_transformer, categorical_cols),
        ('num', numeric_transformer, numeric_cols),
    ]
)

models = {
    'LogisticRegression': LogisticRegression(max_iter=200),
    'KNN': KNeighborsClassifier(n_neighbors=5, weights='distance'),
    'RandomForest': RandomForestClassifier(n_estimators=200, random_state=42),
    'GradientBoosting': GradientBoostingClassifier(random_state=42),
    'ExtraTrees': ExtraTreesClassifier(n_estimators=250, random_state=42),
    'MLPClassifier': MLPClassifier(hidden_layer_sizes=(64, 32), max_iter=200, random_state=42),
}

results = {}
for name, model in models.items():
    pipe = Pipeline(steps=[('preprocessor', preprocessor), ('model', model)])
    pipe.fit(X_train, y_train)
    y_pred = pipe.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    results[name] = acc
    print(f"Модель {name}: accuracy = {acc:.3f}")
