# Q1. (30 points)
Implement a GCM encoding these assumptions and give me quantitative predictions on the test set.
Submit both code and category responses for the data points. 

In [2]:
import pandas as pd
import numpy as np
from scipy.spatial.distance import cdist

In [None]:
X_df = pd.read_csv("X.csv", names=["weight", "height", "label"])
y_df = pd.read_csv("y.csv", names=["weight", 'height'])

X_features = X_df.iloc[:, :2].values # numpy array
X_labels = X_df.iloc[:, 2].values
y_features = y_df.values


# GCM Model

In [4]:
def calculate_similarity(test_point, train_features, train_labels, alpha_weight=2, alpha_height=1, beta=1):
    # Define attention weights for each training point
    alpha = np.array([alpha_weight, alpha_height])
    
    # Calculating similarity for each training point
    similarities = []
    for i, exemplar in enumerate(train_features):
        # Computing distance
        distance = np.sum(alpha * np.abs(exemplar - test_point))
        
        # Computing similarity
        similarity = np.exp(-beta * distance)
        
        # Appending similarity along with its label
        similarities.append((similarity, train_labels[i]))

    # Aggregating similarities by category
    small_sim = sum(similar for similar, label in similarities if label == 1)
    average_sim = sum(similar for similar, label in similarities if label == 2)
    large_sim = sum(similar for similar, label in similarities if label == 3)

    # Apply politeness bias: reduce the similarity weight for "large"
    large_sim *= 0.8  # This factor represents the politeness adjustment

    # Choose category with highest similarity score
    similarities_dict = {1: small_sim, 2: average_sim, 3: large_sim}
    return max(similarities_dict, key=similarities_dict.get)


In [5]:
predictions = [calculate_similarity(test_point, X_features, X_labels) for test_point in y_features]
predictions


[3, 2, 3, 2, 2, 3, 2, 2, 2, 1]

In [None]:
predictions_df = pd.DataFrame(predictions)
predictions_df.to_csv('Q1_Category_Label.csv', index=False, header=False)
print("Predictions are saved to Q1_Category_Label.csv")