In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

# Загрузка и предобработка данных
df = pd.read_csv("Most popular 1000 Youtube videos.csv")
df = df.dropna()

# Преобразование признаков
X = df[['Video views', 'Likes', 'Dislikes']].copy()
X['Video views'] = X['Video views'].str.replace(',', '').astype(int)
X['Likes'] = X['Likes'].str.replace(',', '').astype(int)
X['Dislikes'] = X['Dislikes'].str.replace(',', '').astype(int)

# Кодирование категорий (преобразуем текстовые метки в числа)
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(df['Category'])

# Разделение данных
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Масштабирование данных
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

## 1. Модель стекинга
estimators = [
    ('dt', DecisionTreeClassifier(max_depth=5, random_state=42)),
    ('svm', SVC(kernel='rbf', probability=True, random_state=42))
]

stacking_model = StackingClassifier(
    estimators=estimators,
    final_estimator=LogisticRegression(max_iter=1000),
    cv=5
)

stacking_model.fit(X_train_scaled, y_train)
y_pred_stack = stacking_model.predict(X_test_scaled)
stack_accuracy = accuracy_score(y_test, y_pred_stack)
print(f"Stacking Accuracy: {stack_accuracy:.4f}")

## 2. Многослойный персептрон
mlp = MLPClassifier(
    hidden_layer_sizes=(100, 50),
    activation='relu',
    solver='adam',
    max_iter=500,
    random_state=42,
    early_stopping=True
)

mlp.fit(X_train_scaled, y_train)
y_pred_mlp = mlp.predict(X_test_scaled)
mlp_accuracy = accuracy_score(y_test, y_pred_mlp)
print(f"MLP Accuracy: {mlp_accuracy:.4f}")

## Сравнение моделей
print("\nСравнение моделей:")
print(f"- Stacking Classifier: {stack_accuracy:.4f}")
print(f"- MLP Classifier: {mlp_accuracy:.4f}")

if stack_accuracy > mlp_accuracy:
    print("\nЛучшая модель: Stacking Classifier")
elif mlp_accuracy > stack_accuracy:
    print("\nЛучшая модель: MLP Classifier")
else:
    print("\nМодели показали одинаковую точность")

Stacking Accuracy: 0.4369
MLP Accuracy: 0.3495

Сравнение моделей:
- Stacking Classifier: 0.4369
- MLP Classifier: 0.3495

Лучшая модель: Stacking Classifier


