In [None]:
pr = tp/(tp + fp)
recall = tp/(tp + fn)

f1 = 2 * (recall * pr)/(recall + pr)

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.svm import SVC
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_auc_score
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve
df = pd.read_csv("../data/prepared_data_for_classification.csv")
y = (df['views'] > 200).astype(int)
print(y.value_counts())
feature_columns = [
    'duration',
    'frame rate',
    'likes',
    'comments',
    'pixel_count',
    'title_len',
    'uppercase_ratio',
    'emoji_count',
    'has_exclamation',
    # Бинарные признаки категории
    'category_Howto & Style',
    'category_Music',
    'category_News & Politics',
    'category_Nonprofits & Activis',
    'category_People & Blogs',
    'category_Pets & Animals',
    'category_Science & Technology',
    'category_Shows',
    'category_Sports',
    'category_Travel & Events',
    # Бинарные признаки наличия текста
    'description',
    'hashtags'
]

X = df[feature_columns]

X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

svm = SVC(
    kernel='rbf',        # ядро: rbf — самое универсальное
    C=1.0,               # штраф за ошибки (чем больше — тем меньше зазор)
    gamma='scale',       # насколько влияет каждый опорный вектор
    probability=True,    # обязательно для ROC-AUC (чтобы получить predict_proba)
    random_state=42
)
svm.fit(X_train, y_train)
y_pred_svm = svm.predict(X_test)
y_proba_svm = svm.predict_proba(X_test)[:, 1]

print("Classification Report:")
print(metrics.classification_report(y_test, y_pred_svm))

cm = confusion_matrix(y_test, y_pred_svm)
labels = ['unpopular', 'popular']
cm_df = pd.DataFrame(cm, index=labels, columns=labels)

plt.figure(figsize=(6,4))
sns.heatmap(cm_df, annot=True, fmt='d', cmap='Blues', linewidths=0.5, linecolor='black')
plt.title('SVM — Confusion Matrix')
plt.xlabel('Actual Class')
plt.ylabel('Predicted Class')
plt.show()

fpr, tpr, thresholds = roc_curve(y_test, y_proba_svm)

# Строим график
plt.figure(figsize=(7, 5))
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (AUC = {roc_auc_score(y_test, y_proba_svm):.3f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--', label='Random classifier')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve — SVM')
plt.legend(loc="lower right")
plt.grid(alpha=0.3)
plt.show()

print("ROC-AUC:", roc_auc_score(y_test, y_proba_svm))