In [None]:
import tensorflow as tf
import sys
import joblib
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression 
from sklearn.svm import LinearSVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
import seaborn as sns
from sklearn.utils import shuffle

info = tf.sysconfig.get_build_info()
print("Built against CUDA:",  info.get("cuda_version"))
print("Built against cuDNN:", info.get("cudnn_version"))
print("GPUs found: ",         tf.config.list_physical_devices("GPU"))

print("Python:", sys.version.split()[0])
print("TF:",     tf.__version__)

prepared_data_path = '../Experiment/PreprocessedData/AmazonPrepared.joblib'
prepared_data = joblib.load(prepared_data_path)

X_tfidf = prepared_data['X_tfidf'].astype(np.float32)
y = prepared_data['y']

feature_names = prepared_data['feature_names']
vectorizer = prepared_data['vectorizer']

print(f"\n{y.value_counts()}")

X_train_sparse, X_test_sparse, y_train, y_test = train_test_split(
    X_tfidf, y, test_size=0.15, random_state=42, stratify=y
)

In [None]:
lr_accuracies = []
lr_precisions_class = []
lr_recalls_class = []
lr_f1_scores_class = []

for i in range(10):
    X_train_shuffled, y_train_shuffled = shuffle(X_train_sparse, y_train, random_state=i)
    
    lr_classifier = LogisticRegression( 
        solver='saga',
        random_state=42, 
        max_iter=1000   
    )
    lr_classifier.fit(X_train_shuffled, y_train_shuffled)
    y_pred_lr_run = lr_classifier.predict(X_test_sparse)

    lr_accuracies.append(accuracy_score(y_test, y_pred_lr_run))
    lr_precisions_class.append(precision_score(y_test, y_pred_lr_run, labels=['CG', 'OR'], average=None, zero_division=0))
    lr_recalls_class.append(recall_score(y_test, y_pred_lr_run, labels=['CG', 'OR'], average=None, zero_division=0))
    lr_f1_scores_class.append(f1_score(y_test, y_pred_lr_run, labels=['CG', 'OR'], average=None, zero_division=0))

In [None]:
avg_lr_precisions = np.mean(np.array(lr_precisions_class), axis=0)
std_lr_precisions = np.std(np.array(lr_precisions_class), axis=0)
avg_lr_recalls = np.mean(np.array(lr_recalls_class), axis=0)
std_lr_recalls = np.std(np.array(lr_recalls_class), axis=0)
avg_lr_f1_scores = np.mean(np.array(lr_f1_scores_class), axis=0)
std_lr_f1_scores = np.std(np.array(lr_f1_scores_class), axis=0)

print(f"Accuracy:  {np.mean(lr_accuracies):.4f}")
print(f"Preciziškumas (CG): {avg_lr_precisions[0]:.4f}")
print(f"Preciziškumas (OR):    {avg_lr_precisions[1]:.4f}")
print(f"Atkūrimas (CG):    {avg_lr_recalls[0]:.4f}")
print(f"Atkūrimas (OR):       {avg_lr_recalls[1]:.4f}")
print(f"F1-statistikos reikšmė (CG):  {avg_lr_f1_scores[0]:.4f}")
print(f"F1-statistikos reikšmė (OR):     {avg_lr_f1_scores[1]:.4f}")

In [None]:
knn_accuracies = []
knn_precisions_class = []
knn_recalls_class = []
knn_f1_scores_class = []

for i in range(10):
    X_train_shuffled, y_train_shuffled = shuffle(X_train_sparse, y_train, random_state=i)
    
    knn_classifier = KNeighborsClassifier(
        n_neighbors=7,
        metric='cosine',
        weights='distance'
    )
    knn_classifier.fit(X_train_shuffled, y_train_shuffled)
    y_pred_knn_run = knn_classifier.predict(X_test_sparse)

    knn_accuracies.append(accuracy_score(y_test, y_pred_knn_run))
    knn_precisions_class.append(precision_score(y_test, y_pred_knn_run, labels=['CG', 'OR'], average=None, zero_division=0))
    knn_recalls_class.append(recall_score(y_test, y_pred_knn_run, labels=['CG', 'OR'], average=None, zero_division=0))
    knn_f1_scores_class.append(f1_score(y_test, y_pred_knn_run, labels=['CG', 'OR'], average=None, zero_division=0))

In [None]:
avg_knn_precisions = np.mean(np.array(knn_precisions_class), axis=0)
std_knn_precisions = np.std(np.array(knn_precisions_class), axis=0)
avg_knn_recalls = np.mean(np.array(knn_recalls_class), axis=0)
std_knn_recalls = np.std(np.array(knn_recalls_class), axis=0)
avg_knn_f1_scores = np.mean(np.array(knn_f1_scores_class), axis=0)
std_knn_f1_scores = np.std(np.array(knn_f1_scores_class), axis=0)

print(f"Accuracy:  {np.mean(knn_accuracies):.4f}")
print(f"Preciziškumas (CG): {avg_knn_precisions[0]:.4f}")
print(f"Preciziškumas (OR):    {avg_knn_precisions[1]:.4f}")
print(f"Atkūrimas (CG):    {avg_knn_recalls[0]:.4f}")
print(f"Atkūrimas (OR):       {avg_knn_recalls[1]:.4f}")
print(f"F1-statistikos reikšmė (CG):  {avg_knn_f1_scores[0]:.4f}")
print(f"F1-statistikos reikšmė (OR):     {avg_knn_f1_scores[1]:.4f}")

In [None]:
svm_accuracies = []
svm_precisions_class = []
svm_recalls_class = []
svm_f1_scores_class = []

for i in range(10):
    X_train_shuffled, y_train_shuffled = shuffle(X_train_sparse, y_train, random_state=i)
    
    svm_classifier = LinearSVC(
        C=1.0, 
        random_state=42, 
        max_iter=2000, 
        dual="auto"
    ) 
    svm_classifier.fit(X_train_shuffled, y_train_shuffled)
    y_pred_svm_run = svm_classifier.predict(X_test_sparse)

    svm_accuracies.append(accuracy_score(y_test, y_pred_svm_run))
    svm_precisions_class.append(precision_score(y_test, y_pred_svm_run, labels=['CG', 'OR'], average=None, zero_division=0))
    svm_recalls_class.append(recall_score(y_test, y_pred_svm_run, labels=['CG', 'OR'], average=None, zero_division=0))
    svm_f1_scores_class.append(f1_score(y_test, y_pred_svm_run, labels=['CG', 'OR'], average=None, zero_division=0))

In [None]:
avg_svm_precisions = np.mean(np.array(svm_precisions_class), axis=0)
std_svm_precisions = np.std(np.array(svm_precisions_class), axis=0)
avg_svm_recalls = np.mean(np.array(svm_recalls_class), axis=0)
std_svm_recalls = np.std(np.array(svm_recalls_class), axis=0)
avg_svm_f1_scores = np.mean(np.array(svm_f1_scores_class), axis=0)
std_svm_f1_scores = np.std(np.array(svm_f1_scores_class), axis=0)

print(f"Accuracy:  {np.mean(svm_accuracies):.4f}")
print(f"Preciziškumas (CG): {avg_svm_precisions[0]:.4f}")
print(f"Preciziškumas (OR):    {avg_svm_precisions[1]:.4f}")
print(f"Atkūrimas (CG):    {avg_svm_recalls[0]:.4f}")
print(f"Atkūrimas (OR):       {avg_svm_recalls[1]:.4f}")
print(f"F1-statistikos reikšmė (CG):  {avg_svm_f1_scores[0]:.4f}")
print(f"F1-statistikos reikšmė (OR):     {avg_svm_f1_scores[1]:.4f}")