In [74]:
import argparse
import numpy as np
import sklearn.metrics.pairwise as pairwise

def read_data(filepath):
    Z = np.loadtxt(filepath)
    y = np.array(Z[:, 0], dtype=int)  # labels are in the first column
    X = np.array(Z[:, 1:], dtype=float)  # data is in all the others
    return [X, y]

def save_data(filepath, Y):
    np.savetxt(filepath, Y, fmt="%d")

# Read in the variables needed.
outfile = "output.npy"  # File where output (predictions) will be written.
d = 0.95  # Damping factor d in the MRW equation.
k = 1  # Number of (labeled) seeds to use per class.
t = "degree"  # Strategy for choosing seeds.
gamma = 0.5  # Gamma parameter in the RBF kernel
epsilon = 0.01  # Convergence threshold in the MRW iteration.

infile = "Z_easy.txt"
# Read in the data.
X, y = read_data(infile)

# Create affinity matrix via RBF kernel from sklearn using X and gamma
A = pairwise.rbf_kernel(X, gamma=gamma)

# Calculate the diagonal degree matrix D from the sum of rows in A (subtract epsilon)
D = np.diag(np.sum(A, axis=1) + epsilon)

# Find weighted transition probability matrix W
W = np.zeros(A.shape)
#Fill in W using Aij/Dij formula
for i in range(len(X)):
    for j in range(len(X)):
        if D[i, i] != 0:
            W[i, j] = A[i, j] / D[i, i]

# Create seed vectors (must be able to use RANDOM and DEGREE seed selection)
# desired strategy defined in t variable
# Initialize seed vectors list
seeds = []
#seed indices:
seed_indices = []
# Pull class labels from y matrix, ignore -1 "label"
unique_labels = [label for label in np.unique(y) if label != -1]

# Loop through each label and select seeds either randomly or by degree to be used in MRW
if t == "random":
    for label in unique_labels:
        # Pull indices for corresponding y label
        indices = [i for i, y_label in enumerate(y) if y_label == label]
        # Pick random seed
        class_seed_index = np.random.choice(indices, k)
        # Add to seeds list
        seeds.append(X[class_seed_index])
        # Track seed vector indices
        seed_indices.append(class_seed_index)
# Loop through class labels and select seeds by highest degree
elif t == "degree":
    for label in unique_labels:
        # Pull indices for corresponding label
        indices = [i for i, y_label in enumerate(y) if y_label == label]
        # Define class seeds
        class_seeds = X[indices]
        # Pull rows in A corresponding to the selected indices
        selected_A_rows = A[indices]
        # Calculate degrees (sum of rows) for the selected A rows
        degrees = np.sum(selected_A_rows, axis=1)
        # Create a list of tuples pairing degrees with original indices to keep track of degree-index pair
        degrees_with_indices = [(degree, index) for degree, index in zip(degrees, indices)]
        # Sort degrees_with_indices in descending order based on degree values
        degrees_with_indices.sort(reverse=True)
        # Extract the highest degree and its corresponding index in X
        highest_degree, highest_degree_index = degrees_with_indices[0]
        # Use the corresponding index in X to select the row with the highest degree
        highest_degree_seed = X[highest_degree_index]
        # Add the highest degree seed to the seeds list
        seeds.append(highest_degree_seed)
        # Track seed indices
        seed_indices.append(highest_degree_index)

#Initialize ranking vectors
ranking_vectors = []
#Perform iteration through each class
for label in unique_labels:
    # Create U seed vector with a length equal to rows in X for each class
    U = np.zeros(len(X))
    #Set correspoinding U elements to 1 for specific class
    for i in range(len(y)):
        if y[i] == label:
            U[i] = 1
    # Normalize U so that sum of terms is 1
    normalized_U = U / np.sum(U)
    # Start MRW iterations for R until epsilon convergence threshold is met
    # print(normalized_U)
    # Perform MRW iterations until convergence based on epsilon
    converged = False
    #Initialize ranking vector R (anything other than 0s)
    R = np.ones(len(X))  
    #Loop
    while not converged:
        # Perform MRW iteration
        R_new = (1-d) * normalized_U + np.dot(W,R) 
        # Check for convergence based on squared difference
        squared_diff = np.sum((R - R_new) ** 2)
        if squared_diff < epsilon:
            converged = True
            # Append the final ranking vector to the list
            ranking_vectors.append(R_new)
            break
        else:
            #Update R if  threshold not met
            R = R_new


# # Generate prediction labels for unlabeled data
for i in range(len(y)):
    if y[i] == -1:
        print(i)
        predicted_label = np.argmax([ranking_vectors[0][i],ranking_vectors[1][i],ranking_vectors[2][i]])
        y[i] = predicted_label


print(ranking_vectors[0][52])
print(ranking_vectors[1][52])
print(ranking_vectors[2][52])
print(ranking_vectors[0])
print(ranking_vectors[1])
print(ranking_vectors[2])
# #Save output to "output.npy"
save_data(outfile, y)

52
59
60
85
86
96
0.9987897441217721
0.9987897441217721
0.9987897441217721
