In [14]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.multiclass import OneVsOneClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score

# Вхідний файл, який містить дані
input_file = 'income_data.txt'

In [3]:
# Читання даних
X = []
y = []
count_class1 = 0
count_class2 = 0
max_datapoints = 25000


In [6]:
with open(input_file, 'r') as f:
  for line in f:
    if count_class1 >= max_datapoints and count_class2 >= max_datapoints:
      break
    if '?' in line:
      continue

    data = line.strip().split(',')

    if data[-1].strip() == '<=50K' and count_class1 < max_datapoints:
      X.append(data)
      count_class1 += 1
    elif data[-1].strip() == '>50K' and count_class2 < max_datapoints:
      X.append(data)
      count_class2 += 1

In [7]:
# Перетворення на масив numpy
X = np.array(X)

# Перетворення рядкових даних на числові
label_encoder = []
X_encoded = np.empty(X.shape)

for i,item in enumerate(X[1]):
 if item.isdigit():
  X_encoded[:, i] = X[:, i]
 else:
  label_encoder.append(preprocessing.LabelEncoder())
  X_encoded[:, i] = label_encoder[-1].fit_transform(X[:,i])
X = X_encoded[:, :-1].astype(int)
y = X_encoded[:, -1].astype(int)

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=5)
results = {}

In [11]:
# Поліноміальне ядро
poly_classifier = SVC(kernel='poly', degree=3, random_state=0)
poly_classifier.fit(X_train, y_train)
y_pred_poly = poly_classifier.predict(X_test)

In [15]:
results['Polynomial Kernel'] = {
    'Accuracy': accuracy_score(y_test, y_pred_poly),
    'Precision': precision_score(y_test, y_pred_poly, average='weighted'),
    'Recall': recall_score(y_test, y_pred_poly, average='weighted'),
    'F1-score': f1_score(y_test, y_pred_poly, average='weighted')
}

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [16]:
rbf_classifier = SVC(kernel='rbf', random_state=0)
rbf_classifier.fit(X_train, y_train)
y_pred_rbf = rbf_classifier.predict(X_test)

In [17]:
results['RBF Kernel'] = {
    'Accuracy': accuracy_score(y_test, y_pred_rbf),
    'Precision': precision_score(y_test, y_pred_rbf, average='weighted'),
    'Recall': recall_score(y_test, y_pred_rbf, average='weighted'),
    'F1-score': f1_score(y_test, y_pred_rbf, average='weighted')
}

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [18]:
sigmoid_classifier = SVC(kernel='sigmoid', random_state=0)
sigmoid_classifier.fit(X_train, y_train)
y_pred_sigmoid = sigmoid_classifier.predict(X_test)

In [19]:
results['Sigmoid Kernel'] = {
    'Accuracy': accuracy_score(y_test, y_pred_sigmoid),
    'Precision': precision_score(y_test, y_pred_sigmoid, average='weighted'),
    'Recall': recall_score(y_test, y_pred_sigmoid, average='weighted'),
    'F1-score': f1_score(y_test, y_pred_sigmoid, average='weighted')
}

In [20]:
for kernel, metrics in results.items():
    print(f"Results for {kernel}:")
    print(f"Accuracy: {metrics['Accuracy']:.2f}")
    print(f"Precision: {metrics['Precision']:.2f}")
    print(f"Recall: {metrics['Recall']:.2f}")
    print(f"F1-score: {metrics['F1-score']:.2f}\n")

Results for Polynomial Kernel:
Accuracy: 0.74
Precision: 0.55
Recall: 0.74
F1-score: 0.64

Results for RBF Kernel:
Accuracy: 0.74
Precision: 0.55
Recall: 0.74
F1-score: 0.64

Results for Sigmoid Kernel:
Accuracy: 0.61
Precision: 0.61
Recall: 0.61
F1-score: 0.61

