**Import Libraries**

In [1]:
# Import necessary libraries
import pandas as pd  # Pandas for data manipulation and analysis
from sklearn.preprocessing import LabelEncoder  # LabelEncoder for encoding categorical target labels
from sklearn.feature_extraction.text import TfidfVectorizer  # TfidfVectorizer for converting text data to TF-IDF features
from sklearn.svm import SVC  # SVC (Support Vector Classifier) for SVM classification
from sklearn.metrics import accuracy_score, top_k_accuracy_score  # Metrics for evaluating model performance
from sklearn.model_selection import StratifiedShuffleSplit  # StratifiedShuffleSplit for train-test splitting
import joblib  # Joblib for saving and loading models
from sklearn.decomposition import TruncatedSVD  # TruncatedSVD for dimensionality reduction using Singular Value Decomposition (SVD)
from sklearn.cluster import KMeans
from sklearn.model_selection import train_test_split

**Load The Dataset**

In [2]:
# Mount Google Drive to access files and save outputs
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# Specify the file path in Google Drive and load the dataset after preprocessing
file_path = '/content/drive/My Drive/kaggle_dataset_after_preprocessing.csv'
dataset = pd.read_csv(file_path)

In [4]:
# Display the DataFrame 'dataset'
# This will print the first and last 5 rows of the DataFrame along with the column names and index
# Useful for a quick overview of the data after processing
dataset

Unnamed: 0,Summary_Stemmed,processed_summary,Assignee
0,"['scroll', 'scroll', 'mice', 'touchpad', 'etc'...",scroll scroll mice touchpad etc scroll,amit@chromium.org
1,"['add', 'check', 'item', 'download', 'panel', ...",add check item download panel browser test,achuith@chromium.org
2,"['useafterfre', 'navig', 'document', 'form', '...",useafterfre navig document form valid messag s...,tkent@chromium.org
3,"['add', 'address', 'properli', 'autofil', 'opt...",add address properli autofil option dialog box,sky@chromium.org
4,"['libxmlgyp', 'defin', 'libxmlstat', 'direct',...",libxmlgyp defin libxmlstat direct depend,wtc@chromium.org
...,...,...,...
33446,"['onaddstream', 'fire', 'second', 'stream', 's...",onaddstream fire second stream sdp contain ams...,deadbeef@chromium.org
33447,"['default', 'toolchain', 'clang', 'linux', 'ca...",default toolchain clang linux caus build failu...,vivek...@samsung.com
33448,"['scatter', 'chart', 'updat', 'modifi', 'valu'...",scatter chart updat modifi valu refer cell,amol.w...@synerzip.com
33449,"['consid', 'use', '1em', 'margin', 'list', 'it...",consid use 1em margin list item identifi speci...,glebl@chromium.org


In [5]:
def leave_out_bugs(df, leave_out_ratio=0.2):
    train_dfs = []
    test_dfs = []
    for _, group in df.groupby('Assignee'):
        train_group, test_group = train_test_split(group, test_size=leave_out_ratio, random_state=42)
        train_dfs.append(train_group)
        test_dfs.append(test_group)
    return pd.concat(train_dfs), pd.concat(test_dfs)

# Split the data
train_df, test_df = leave_out_bugs(dataset, leave_out_ratio=0.2)

In [6]:
# Extract features and labels
X_train = train_df['processed_summary']
y_train = train_df['Assignee']
X_test = test_df['processed_summary']
y_test = test_df['Assignee']

In [43]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans

# Initialize the TF-IDF vectorizer with n-gram range (1, 2)
tfidf_vectorizer = TfidfVectorizer(ngram_range=(1, 2))

# Fit and transform the training data
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)

# Step 2: Clustering bugs
num_clusters = 200  # Adjust as needed
kmeans = KMeans(n_clusters=num_clusters, random_state=42)
kmeans.fit(X_train_tfidf)

cluster_labels_train = kmeans.predict(X_train_tfidf)



In [27]:
# Transform the left-out (test) data
X_test_tfidf = tfidf_vectorizer.transform(X_test)

In [44]:
# Predict clusters for the left-out bugs
cluster_labels_test = kmeans.predict(X_test_tfidf)

In [40]:
from collections import defaultdict

# Create a mapping from cluster labels to developers based on the training data
cluster_to_developers = defaultdict(list)
for cluster_label, developer in zip(cluster_labels_train, y_train):
    cluster_to_developers[cluster_label].append(developer)

# Determine the majority developer in each cluster
majority_developer_in_cluster = {}
for cluster_label, developers in cluster_to_developers.items():
    majority_developer_in_cluster[cluster_label] = max(set(developers), key=developers.count)

# Verify the left-out bugs
correct_assignments = 0
total_assignments = len(cluster_labels_test)

for cluster_label, developer in zip(cluster_labels_test, y_test):
    if developer == majority_developer_in_cluster.get(cluster_label):
        correct_assignments += 1

accuracy = correct_assignments / total_assignments

print(f"Accuracy of assigning left-out bugs: {accuracy:.2f}")

Accuracy of assigning left-out bugs: 0.05


In [41]:
from collections import defaultdict

# Create a mapping from developers to their cluster assignments
developer_to_clusters = defaultdict(list)
for developer, cluster_label in zip(y_train, cluster_labels_train):
    developer_to_clusters[developer].append(cluster_label)

# Determine the majority cluster for each developer
majority_cluster_for_developer = {}
for developer, clusters in developer_to_clusters.items():
    majority_cluster_for_developer[developer] = max(set(clusters), key=clusters.count)

# Verify the left-out bugs
correct_assignments = 0
total_assignments = len(cluster_labels_test)

for cluster_label, developer in zip(cluster_labels_test, y_test):
    if developer in majority_cluster_for_developer and cluster_label == majority_cluster_for_developer[developer]:
        correct_assignments += 1

accuracy = correct_assignments / total_assignments

print(f"Accuracy of assigning left-out bugs: {accuracy:.2f}")

Accuracy of assigning left-out bugs: 0.22


In [24]:
from sklearn.cluster import KMeans
from sklearn.metrics import adjusted_rand_score, normalized_mutual_info_score, homogeneity_score, completeness_score, v_measure_score

In [45]:
# Extract true labels from the training and validation data
true_labels_train = train_df['Assignee'].values
true_labels_val = test_df['Assignee'].values

# Evaluation metrics for the training set
ari_train = adjusted_rand_score(true_labels_train, cluster_labels_train)
nmi_train = normalized_mutual_info_score(true_labels_train, cluster_labels_train)
homogeneity_train = homogeneity_score(true_labels_train, cluster_labels_train)
completeness_train = completeness_score(true_labels_train, cluster_labels_train)
v_measure_train = v_measure_score(true_labels_train, cluster_labels_train)

print(f"Training set evaluation:")
print(f"Adjusted Rand Index: {ari_train}")
print(f"Normalized Mutual Information: {nmi_train}")
print(f"Homogeneity: {homogeneity_train}")
print(f"Completeness: {completeness_train}")
print(f"V-measure: {v_measure_train}")

# Evaluation metrics for the validation set
ari_val = adjusted_rand_score(true_labels_val, cluster_labels_test)
nmi_val = normalized_mutual_info_score(true_labels_val, cluster_labels_test)
homogeneity_val = homogeneity_score(true_labels_val, cluster_labels_test)
completeness_val = completeness_score(true_labels_val, cluster_labels_test)
v_measure_val = v_measure_score(true_labels_val, cluster_labels_test)

print(f"Validation set evaluation:")
print(f"Adjusted Rand Index: {ari_val}")
print(f"Normalized Mutual Information: {nmi_val}")
print(f"Homogeneity: {homogeneity_val}")
print(f"Completeness: {completeness_val}")
print(f"V-measure: {v_measure_val}")

Training set evaluation:
Adjusted Rand Index: 0.01085768696902282
Normalized Mutual Information: 0.38075345293706814
Homogeneity: 0.3352502440076427
Completeness: 0.4405487288918974
V-measure: 0.38075345293706825
Validation set evaluation:
Adjusted Rand Index: 0.01036117981469603
Normalized Mutual Information: 0.5181144162419392
Homogeneity: 0.45274150313976985
Completeness: 0.6055521940614514
V-measure: 0.5181144162419391


**Encode the labels**



In [None]:
# Initialize the LabelEncoder
label_encoder = LabelEncoder()

# Fit the LabelEncoder to the 'Assignee' column and transform it to numeric labels
# This step converts categorical labels in the 'Assignee' column to numeric labels,
# which is necessary for training machine learning models.
dataset['Assignee_Class'] = label_encoder.fit_transform(dataset['Assignee'])

# The LabelEncoder in scikit-learn assigns numeric labels to the unique categories in alphabetical order
# (or lexicographical order for strings).
# This means that the first unique category in alphabetical order is labeled as 0, the second as 1, and so on.

In [None]:
# Display the DataFrame 'dataset'
# This will print the first and last 5 rows of the DataFrame along with the column names and index
# Useful for a quick overview of the data after labeling
dataset

Unnamed: 0,Summary_Stemmed,processed_summary,Assignee,Assignee_Class
0,"['scroll', 'scroll', 'mice', 'touchpad', 'etc'...",scroll scroll mice touchpad etc scroll,amit@chromium.org,124
1,"['add', 'check', 'item', 'download', 'panel', ...",add check item download panel browser test,achuith@chromium.org,52
2,"['useafterfre', 'navig', 'document', 'form', '...",useafterfre navig document form valid messag s...,tkent@chromium.org,2118
3,"['add', 'address', 'properli', 'autofil', 'opt...",add address properli autofil option dialog box,sky@chromium.org,1972
4,"['libxmlgyp', 'defin', 'libxmlstat', 'direct',...",libxmlgyp defin libxmlstat direct depend,wtc@chromium.org,2282
...,...,...,...,...
117376,"['updat', 'gleanj', 'dashboard', 'ignor', 'gle...",updat gleanj dashboard ignor glean sdk data vpn,brosa,278
117377,"['autocomplet', 'type', 'valid', 'valu', 'pass...",autocomplet type valid valu pass record,brosa,278
117378,"['intermitt', 'slow', 'see', 'ping', 'show', '...",intermitt slow see ping show debug ping viewer,brosa,278
117379,"['investig', 'string', 'metric', 'type', 'adeq...",investig string metric type adequ captur gfxad...,pmcmanis,1685


In [None]:
# Display the number of unique values in each column of the dataset
dataset.nunique()

Summary_Stemmed      117103
processed_summary    117103
Assignee               2370
Assignee_Class         2370
dtype: int64

In [None]:
# Print the shape of the dataset
print(dataset.shape)
# This line prints the shape of the dataset, which includes the number of rows and columns.
# It helps to understand the dimensions of the dataset.

# Print the class distribution of 'Assignee_Class'
print(dataset['Assignee_Class'].value_counts())
# This line prints the count of each unique value in the 'Assignee_Class' column.
# It provides insight into the distribution of classes, which is useful for understanding class imbalance.

(117381, 4)
Assignee_Class
1014    2478
1408    2412
1009    1467
1643    1377
1013    1162
        ... 
1704       5
607        5
899        5
351        5
947        5
Name: count, Length: 2370, dtype: int64


**Split the dataset**

In [None]:
# Create the StratifiedShuffleSplit object
sss = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)

# This line creates a StratifiedShuffleSplit object with the following parameters:
# - n_splits=1: Specifies that there will be only one split.
# - test_size=0.2: Indicates that 20% of the dataset will be used as the test set.
# - random_state=42: Ensures reproducibility by using a fixed seed for the random number generator.

# Split the dataset into train and test sets
train_idx, test_idx = next(sss.split(dataset, dataset['Assignee_Class']))

# This line performs the split based on the 'Assignee_Class' column to ensure that the train and test sets
# have a similar class distribution. 'sss.split' returns the indices of the train and test samples.

# Create the training DataFrame
train_df = dataset.iloc[train_idx].reset_index(drop=True)

# This line creates a training DataFrame using the indices obtained from the split.
# 'iloc' is used to select the rows corresponding to the train indices.
# 'reset_index(drop=True)' resets the index of the training DataFrame.

# Create the test DataFrame
test_df = dataset.iloc[test_idx].reset_index(drop=True)

# This line creates a test DataFrame using the indices obtained from the split.
# 'iloc' is used to select the rows corresponding to the test indices.
# 'reset_index(drop=True)' resets the index of the test DataFrame.

In [None]:
# Create another StratifiedShuffleSplit object for the train-validation split
sss_val = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)

# This line creates a new StratifiedShuffleSplit object specifically for splitting the initial training set
# into train and validation sets with the following parameters:
# - n_splits=1: Specifies that there will be only one split.
# - test_size=0.2: Indicates that 20% of the training set will be used as the validation set.
# - random_state=42: Ensures reproducibility by using a fixed seed for the random number generator.

# Split the initial train set into train and validation sets
for train_idx, val_idx in sss_val.split(train_df, train_df['Assignee_Class']):
    final_train_df = train_df.iloc[train_idx].reset_index(drop=True)
    val_df = train_df.iloc[val_idx].reset_index(drop=True)

# This loop performs the split based on the 'Assignee_Class' column to ensure that the train and validation sets
# have a similar class distribution. 'sss_val.split' returns the indices of the train and validation samples.

# Create the final training DataFrame
final_train_df = train_df.iloc[train_idx].reset_index(drop=True)

# This line creates a final training DataFrame using the indices obtained from the split.
# 'iloc' is used to select the rows corresponding to the train indices.
# 'reset_index(drop=True)' resets the index of the final training DataFrame.

# Create the validation DataFrame
val_df = train_df.iloc[val_idx].reset_index(drop=True)

# This line creates a validation DataFrame using the indices obtained from the split.
# 'iloc' is used to select the rows corresponding to the validation indices.
# 'reset_index(drop=True)' resets the index of the validation DataFrame.

In [None]:
# Output the shapes of hathe resulting DataFrames
print(final_train_df.spe)
print(val_df.shape)
print(test_df.shape)

(75123, 4)
(18781, 4)
(23477, 4)


In [None]:
# Separate features (X) and labels (y) for train, validation, and test sets
X_train, y_train = final_train_df['processed_summary'], final_train_df['Assignee_Class']
X_val, y_val = val_df['processed_summary'], val_df['Assignee_Class']
X_test, y_test = test_df['processed_summary'], test_df['Assignee_Class']

**Apply TF-IDF Transformation**

In [None]:
# Initialize the TF-IDF vectorizer with n-gram range (1, 2)
tfidf_vectorizer = TfidfVectorizer(ngram_range=(1, 2))

# Fit and transform the training dataval_df
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
"""
Purpose:
- Initializes a TF-IDF vectorizer object with a specified n-gram range (1, 2).
- Fits the vectorizer on the training data and transforms it into a TF-IDF matrix.

Parameters:
- ngram_range=(1, 2): Specifies to extract unigrams and bigrams.

Outputs:
- X_train_tfidf: TF-IDF matrix for training data where rows are documents and columns are TF-IDF features.
"""

# Transform the validation data
X_val_tfidf = tfidf_vectorizer.transform(X_val)
"""
Purpose:
- Transforms the validation data using the same TF-IDF vectorizer fitted on the training data.

Outputs:
- X_val_tfidf: TF-IDF matrix for validation data using the fitted TF-IDF vectorizer.
"""

# Transform the test data
X_test_tfidf = tfidf_vectorizer.transform(X_test)
"""
Purpose:
- Transforms the test data using the same TF-IDF vectorizer fitted on the training data.

Outputs:
- X_test_tfidf: TF-IDF matrix for test data using the fitted TF-IDF vectorizer.
"""

'\nPurpose:\n- Transforms the test data using the same TF-IDF vectorizer fitted on the training data.\n\nOutputs:\n- X_test_tfidf: TF-IDF matrix for test data using the fitted TF-IDF vectorizer.\n'

In [None]:
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans

# Calculate WCSS for different number of clusters
wcss = []
max_clusters = 100
for i in range(1, max_clusters + 1):
    kmeans = KMeans(n_clusters=i, random_state=42)
    kmeans.fit(X_train_tfidf)
    wcss.append(kmeans.inertia_)

# Plot the elbow curve
plt.figure(figsize=(10, 6))
plt.plot(range(1, max_clusters + 1), wcss, marker='o')
plt.xlabel('Number of Clusters')
plt.ylabel('WCSS')
plt.title('Elbow Method for Optimal Number of Clusters')
plt.show()



In [None]:
# Step 2: Clustering bugs
num_clusters = 50  # Adjust as needed
kmeans = KMeans(n_clusters=num_clusters, random_state=42)
kmeans.fit(X_train_tfidf)



In [None]:
cluster_labels_train = kmeans.predict(X_train_tfidf)
cluster_developers = {}

for cluster_id in range(num_clusters):
    bugs_in_cluster = final_train_df.iloc[cluster_labels_train == cluster_id]
    developers_in_cluster = bugs_in_cluster['Assignee'].unique().tolist()
    cluster_developers[cluster_id] = developers_in_cluster

**Apply SVD**

In [None]:
# Apply SVD to reduce dimensionality
svd = TruncatedSVD(n_components=1100, random_state=42)
"""
Purpose:
- Initializes a TruncatedSVD object to reduce the dimensionality of the TF-IDF vectors.

Parameters:
- n_components=1100: Specifies the number of components (dimensions) to retain after dimensionality reduction.
- random_state=42: Ensures reproducibility of results by fixing the random seed.

Outputs:
- svd: TruncatedSVD object configured with specified parameters.
"""

# Fit and transform the training data
X_train_svd = svd.fit_transform(X_train_tfidf)
"""
Purpose:
- Fits the TruncatedSVD model on the training TF-IDF data and transforms it to reduce dimensions.

Outputs:
- X_train_svd: Reduced-dimensional representation of the training TF-IDF data.
"""

# Transform the validation data
X_val_svd = svd.transform(X_val_tfidf)
"""
Purpose:
- Transforms the validation TF-IDF data using the fitted TruncatedSVD model.

Outputs:
- X_val_svd: Reduced-dimensional representation of the validation TF-IDF data using the fitted SVD.
"""

# Transform the test data
X_test_svd = svd.transform(X_test_tfidf)
"""
Purpose:
- Transforms the test TF-IDF data using the fitted TruncatedSVD model.

Outputs:
- X_test_svd: Reduced-dimensional representation of the test TF-IDF data using the fitted SVD.
"""

**Train an SVM Classifier**

In [None]:
# Initialize the SVM classifier
svm_classifier = SVC(C=10, kernel='linear', probability=True)

# Train the classifier on the TF-IDF transformed training data
svm_classifier.fit(X_train_tfidf, cluster_labels_train)


In [None]:
from sklearn.metrics import accuracy_score, classification_report
# Step 5: Predict clusters for validation set and evaluate
predicted_clusters_val = svm_classifier.predict(X_val_tfidf)

cluster_labels_train = kmeans.predict(X_train_tfidf)
# Map Assignees to actual clusters based on training data
assignee_to_cluster = {}
for assignee, cluster_id in zip(final_train_df['Assignee'], cluster_labels_train):
    assignee_to_cluster[assignee] = cluster_id

# Evaluate against actual clusters for validation set
actual_clusters_val = [assignee_to_cluster[assignee] for assignee in val_df['Assignee']]
accuracy = accuracy_score(actual_clusters_val, predicted_clusters_val)
print(f"Validation Accuracy: {accuracy:.2f}")

# Classification report (optional)
print(classification_report(actual_clusters_val, predicted_clusters_val))

**Evaluate the Classifier on the Validation Set**

In [None]:
# Predict labels on the validation set
val_predictions = svm_classifier.predict(X_val_tfidf)
"""
Purpose:
- Predicts the class labels for the validation set using the trained SVM classifier.

Parameters:
- svm_classifier: Trained SVM classifier object.
- X_val_tfidf: Validation set features transformed into TF-IDF format.

Outputs:
- val_predictions: Predicted class labels for the validation set.
"""

# Calculate accuracy on the validation set
val_accuracy = accuracy_score(y_val, val_predictions)
print("Validation Accuracy:", val_accuracy)
"""
Purpose:
- Computes the accuracy of the SVM classifier on the validation set.

Parameters:
- y_val: True class labels of the validation set.
- val_predictions: Predicted class labels obtained from the SVM classifier.

Outputs:
- Prints the validation accuracy score.
"""

Validation Accuracy: 0.20664576802507836


In [None]:
# Load the model from Google Drive
model_filename = '/content/drive/MyDrive/checkpoints/svm_classifier_model_with_probability.joblib'
svm_classifier = joblib.load(model_filename)
"""
Purpose:
- Loads a pre-trained SVM classifier model from a specified file.

Parameters:
- model_filename: File path where the SVM classifier model is stored.

Outputs:
- svm_classifier: Loaded SVM classifier object ready for inference.
"""

In [None]:
# Predict probabilities on the validation set
val_probabilities = svm_classifier.predict_proba(X_val_tfidf)
"""
Purpose:
- Predicts class probabilities for each sample in the validation set using the trained SVM classifier.

Parameters:
- svm_classifier: SVM classifier model previously trained and loaded.
- X_val_tfidf: TF-IDF transformed validation set features.

Outputs:
- val_probabilities: Predicted probabilities of each class for each sample in the validation set.
"""

In [None]:
# Calculate top-3 accuracy
top_k = 3
val_top_k_accuracy = top_k_accuracy_score(y_val, val_probabilities, k=top_k)
"""
Purpose:
- Calculates the top-K accuracy for the validation set predictions based on predicted probabilities.

Parameters:
- y_val: True labels of the validation set.
- val_probabilities: Predicted probabilities of each class for each sample in the validation set.
- k: Number of top predictions to consider for accuracy calculation.

Outputs:
- val_top_k_accuracy: Top-K accuracy score for the validation set predictions.
"""

print(f"Validation Top-{top_k} Accuracy:", val_top_k_accuracy)
"""
Purpose:
- Prints the computed top-K accuracy score for the validation set predictions.
"""

Validation Top-3 Accuracy: 0.40301368404238325
Validation Top-3 Accuracy: 0.40301368404238325
Validation Top-3 Accuracy: 0.40301368404238325


In [None]:
# Calculate top-5 accuracy
top_k = 5
val_top_k_accuracy = top_k_accuracy_score(y_val, val_probabilities, k=top_k)
"""
Purpose:
- Calculates the top-K accuracy for the validation set predictions based on predicted probabilities.

Parameters:
- y_val: True labels of the validation set.
- val_probabilities: Predicted probabilities of each class for each sample in the validation set.
- k: Number of top predictions to consider for accuracy calculation.

Outputs:
- val_top_k_accuracy: Top-K accuracy score for the validation set predictions.
"""

print(f"Validation Top-{top_k} Accuracy:", val_top_k_accuracy)
"""
Purpose:
- Prints the computed top-K accuracy score for the validation set predictions.
"""

Validation Top-5 Accuracy: 0.469144348011288
Validation Top-5 Accuracy: 0.469144348011288
Validation Top-5 Accuracy: 0.469144348011288


In [None]:
# Calculate top-10 accuracy
top_k = 10
val_top_k_accuracy = top_k_accuracy_score(y_val, val_probabilities, k=top_k)
"""
Purpose:
- Calculates the top-K accuracy for the validation set predictions based on predicted probabilities.

Parameters:
- y_val: True labels of the validation set.
- val_probabilities: Predicted probabilities of each class for each sample in the validation set.
- k: Number of top predictions to consider for accuracy calculation.

Outputs:
- val_top_k_accuracy: Top-K accuracy score for the validation set predictions.
"""

print(f"Validation Top-{top_k} Accuracy:", val_top_k_accuracy)
"""
Purpose:
- Prints the computed top-K accuracy score for the validation set predictions.
"""

Validation Top-10 Accuracy: 0.5530056972472179
Validation Top-10 Accuracy: 0.5530056972472179
Validation Top-10 Accuracy: 0.5530056972472179


In [None]:
# Calculate top-50 accuracy
top_k = 50
val_top_k_accuracy = top_k_accuracy_score(y_val, val_probabilities, k=top_k)
"""
Purpose:
- Calculates the top-K accuracy for the validation set predictions based on predicted probabilities.

Parameters:
- y_val: True labels of the validation set.
- val_probabilities: Predicted probabilities of each class for each sample in the validation set.
- k: Number of top predictions to consider for accuracy calculation.

Outputs:
- val_top_k_accuracy: Top-K accuracy score for the validation set predictions.
"""

print(f"Validation Top-{top_k} Accuracy:", val_top_k_accuracy)
"""
Purpose:
- Prints the computed top-K accuracy score for the validation set predictions.
"""

Validation Top-50 Accuracy: 0.7214738299345083
