**Practical 9**

**Aim : Learning to Rank**
*   Implement a learning to rank algorithm (e.g., RankSVM or RankBoost).
*   Train the ranking model using labelled data and evaluate its effectiveness.


In [None]:
print("T114 | Bhumika Shelar")
import numpy as np
from sklearn.svm import LinearSVC
from itertools import combinations

# Step 1: Sample ranking data
# Features of documents
X = np.array([
    [1, 2],  # doc 1
    [2, 1],  # doc 2
    [3, 3],  # doc 3
    [1, 0]   # doc 4
])

# Relevance labels (higher = better rank)
y = np.array([1, 0, 2, 0])

# Step 2: Create pairwise data
X_pairs = []
y_pairs = []

for i, j in combinations(range(len(X)), 2):
    if y[i] != y[j]:
        X_pairs.append(X[i] - X[j])
        y_pairs.append(1 if y[i] > y[j] else -1)

X_pairs = np.array(X_pairs)
y_pairs = np.array(y_pairs)

# Step 3: Train RankSVM
model = LinearSVC()
model.fit(X_pairs, y_pairs)

# Step 4: Predict ranking scores
scores = model.decision_function(X)

# Step 5: Rank documents
ranking = np.argsort(-scores)

print("Ranking order (best to worst):", ranking)
print("Ranking scores:", scores)

T114 | Bhumika Shelar
Ranking order (best to worst): [2 0 1 3]
Ranking scores: [1.78667919 1.17335813 2.80002491 0.29335396]


In [None]:
print("T114 | Bhumika Shelar")
import numpy as np
from sklearn.svm import LinearSVC
from sklearn.metrics import ndcg_score
def generate_dataset():
    X = np.array([
        [3, 2, 1],
        [2, 1, 0],
        [0, 1, 2],
        [1, 2, 0],
        [2, 1, 3],
        [1, 0, 2],
    ])
    relevance = np.array([3, 2, 1, 3, 1, 2])
    queries = np.array([1, 1, 1, 2, 2, 2])
    return X, relevance, queries
def create_pairwise_data(X, relevance, queries):
    X_pairs, y_pairs = [], []
    for q in np.unique(queries):
        idx = np.where(queries == q)[0]
        for i in idx:
            for j in idx:
                if relevance[i] != relevance[j]:
                    X_pairs.append(X[i] - X[j])
                    y_pairs.append(1 if relevance[i] > relevance[j] else -1)
    return np.array(X_pairs), np.array(y_pairs)
def train_rank_svm(X_pairs, y_pairs):
    model = LinearSVC()
    model.fit(X_pairs, y_pairs)
    return model
def evaluate_model(model, X, relevance, queries):
    ndcg_scores = []
    for q in np.unique(queries):
        idx = np.where(queries == q)[0]
        scores = model.decision_function(X[idx])
        ndcg_scores.append(ndcg_score([relevance[idx]], [scores]))
    print(f"Average NDCG Score: {np.mean(ndcg_scores):.4f}")
def main():
    X, relevance, queries = generate_dataset()
    X_pairs, y_pairs = create_pairwise_data(X, relevance, queries)
    model = train_rank_svm(X_pairs, y_pairs)
    evaluate_model(model, X, relevance, queries)
if __name__ == "__main__":
    main()

T114 | Bhumika Shelar
Average NDCG Score: 0.9475


In [None]:
print("T114 | Bhumika Shelar")
import numpy as np
from sklearn.svm import LinearSVC
from sklearn.metrics import ndcg_score

# Step 1: Dataset
X = np.array([
    [3, 2, 1],
    [2, 1, 0],
    [0, 1, 2],
    [1, 2, 0],
    [2, 1, 3],
    [1, 0, 2]
])
relevance = np.array([3, 2, 1, 3, 1, 2])
queries = np.array([1, 1, 1, 2, 2, 2])
# Step 2: Create pairwise data
X_pairs, y_pairs = [], []

for q in np.unique(queries):
    idx = np.where(queries == q)[0]
    for i in idx:
        for j in idx:
            if relevance[i] > relevance[j]:
                X_pairs.append(X[i] - X[j])
                y_pairs.append(1)
            elif relevance[i] < relevance[j]:
                X_pairs.append(X[i] - X[j])
                y_pairs.append(-1)

X_pairs = np.array(X_pairs)
y_pairs = np.array(y_pairs)
# Step 3: Train RankSVM
model = LinearSVC()
model.fit(X_pairs, y_pairs)
# Step 4: Evaluate using NDCG
ndcg_scores = []
for q in np.unique(queries):
    idx = np.where(queries == q)[0]
    scores = model.decision_function(X[idx])
    ndcg = ndcg_score([relevance[idx]], [scores])
    ndcg_scores.append(ndcg)

print("Average NDCG Score:", np.mean(ndcg_scores))

T114 | Bhumika Shelar
Average NDCG Score: 0.947499501061509
