# Using KNN for one-shot/few-shot

In [2]:
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import f1_score
from sklearn.preprocessing import StandardScaler

# === Step 1: Load dataset ===
df = pd.read_csv("data.csv")

# === Step 2: One-shot labeled data - 1 sample per class ===
one_shot_labeled = df.groupby('label').apply(lambda x: x.sample(1, random_state=42)).reset_index(drop=True)
unlabeled_data = df.drop(one_shot_labeled.index)

# === Step 3: Define features and scale ===
features = ['age', 'physical_activity_days', 'processed_food_meals', 
            'sleep_hours', 'smoking_status', 'alcohol_consumption']

scaler = StandardScaler()

X_labeled = scaler.fit_transform(one_shot_labeled[features])
y_labeled = one_shot_labeled['label']

X_unlabeled = scaler.transform(unlabeled_data[features])
y_unlabeled_true = unlabeled_data['label']

# === Step 4: Train KNN with n_neighbors=1 to avoid errors ===
knn = KNeighborsClassifier(n_neighbors=1)
knn.fit(X_labeled, y_labeled)

# === Step 5: Predict and evaluate ===
y_pred = knn.predict(X_unlabeled)
f1 = f1_score(y_unlabeled_true, y_pred)
print(f"KNN One-Shot F1 Score: {f1:.4f}")


  one_shot_labeled = df.groupby('label').apply(lambda x: x.sample(1, random_state=42)).reset_index(drop=True)


KNN One-Shot F1 Score: 0.5688


# 2. Using SMOTE for data augmentation (few-shot scenario)


In [3]:
# Import necessary libraries
from imblearn.over_sampling import SMOTE
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np

# Load dataset
df = pd.read_csv("data.csv")

# Simulate small labeled set: 10% labeled data
np.random.seed(42)
df['label_known'] = 0
known_indices = np.random.choice(df.index, size=int(0.1 * len(df)), replace=False)
df.loc[known_indices, 'label_known'] = 1

labeled_data = df[df['label_known'] == 1]
unlabeled_data = df[df['label_known'] == 0]

features = ['age', 'physical_activity_days', 'processed_food_meals', 
            'sleep_hours', 'smoking_status', 'alcohol_consumption']

# Scale features
scaler = StandardScaler()
X_labeled = scaler.fit_transform(labeled_data[features])
y_labeled = labeled_data['label']

# Apply SMOTE to augment minority class samples
smote = SMOTE(random_state=42)
X_labeled_resampled, y_labeled_resampled = smote.fit_resample(X_labeled, y_labeled)

# Prepare unlabeled data
X_unlabeled = scaler.transform(unlabeled_data[features])
y_unlabeled_true = unlabeled_data['label']

# Train RandomForest on augmented data
model = RandomForestClassifier(random_state=42)
model.fit(X_labeled_resampled, y_labeled_resampled)

# Predict and evaluate
y_pred = model.predict(X_unlabeled)
print(f"F1 Score after SMOTE: {f1_score(y_unlabeled_true, y_pred):.4f}")


F1 Score after SMOTE: 0.5867


# 3. Hyperparameter tuning for KNN with GridSearchCV

In [4]:
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import f1_score
import pandas as pd
import numpy as np

df = pd.read_csv("data.csv")

# Use 10% labeled for training
np.random.seed(42)
df['label_known'] = 0
known_indices = np.random.choice(df.index, size=int(0.1 * len(df)), replace=False)
df.loc[known_indices, 'label_known'] = 1

labeled_data = df[df['label_known'] == 1]
unlabeled_data = df[df['label_known'] == 0]

features = ['age', 'physical_activity_days', 'processed_food_meals', 
            'sleep_hours', 'smoking_status', 'alcohol_consumption']

scaler = StandardScaler()
X_labeled = scaler.fit_transform(labeled_data[features])
y_labeled = labeled_data['label']

X_unlabeled = scaler.transform(unlabeled_data[features])
y_unlabeled_true = unlabeled_data['label']

knn = KNeighborsClassifier()
param_grid = {
    'n_neighbors': [1, 3, 5, 7],
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan']
}

grid_search = GridSearchCV(knn, param_grid, scoring='f1', cv=3)
grid_search.fit(X_labeled, y_labeled)

best_knn = grid_search.best_estimator_
y_pred = best_knn.predict(X_unlabeled)
print(f"Best KNN params: {grid_search.best_params_}")
print(f"F1 Score with tuned KNN: {f1_score(y_unlabeled_true, y_pred):.4f}")


Best KNN params: {'metric': 'euclidean', 'n_neighbors': 1, 'weights': 'uniform'}
F1 Score with tuned KNN: 0.5338
