In [None]:
import numpy as np  
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score,classification_report
import json
import os 
import pickle

In [3]:
# Load embeddings
train_embeddings = np.load("../embeddings/train_embeddings.npy", allow_pickle=True)
val_embeddings = np.load("../embeddings/val_embeddings.npy", allow_pickle=True)

# Fix nested arrays → stack into proper 2D float array
train_embeddings = np.vstack(train_embeddings).astype(np.float32)
val_embeddings = np.vstack(val_embeddings).astype(np.float32)

print("Train shape:", train_embeddings.shape)
print("Val shape:", val_embeddings.shape)



Train shape: (3, 49536)
Val shape: (3, 49536)


In [4]:
BASE_DIR = os.path.abspath(os.path.join(os.getcwd(), ".."))
#load labels form JSON
with open(os.path.join(BASE_DIR, "data/train/train_dataset.json"), "r") as f:
    train_data = json.load(f)
with open(os.path.join(BASE_DIR, "data/val/val_dataset.json"), "r") as f:
    val_data = json.load(f)
    
y_train = [s["expected_output"] for s in train_data]
y_val = [s["expected_output"] for s in val_data]

all_labels = y_train + y_val  
#encode string labels to integers
label_encoder = LabelEncoder()
all_labels = y_train + y_val 
label_encoder.fit(all_labels)
y_train_encoded = label_encoder.transform(y_train)
y_val_encoded = label_encoder.transform(y_val)

print("Classes:", label_encoder.classes_)


Classes: ['apple' 'apple2' 'bike' 'bike2' 'car' 'tower']


In [5]:
#Train logistic regression classifier
clf = LogisticRegression(max_iter=1000)
clf.fit(train_embeddings, y_train_encoded)
y_pred = clf.predict(val_embeddings)

val_labels = sorted(set(y_val_encoded))

In [6]:
print("Accuracy:", accuracy_score(y_val_encoded, y_pred))
print("\nClassification Report:\n",
      classification_report(
          y_val_encoded,
          y_pred,
          labels=val_labels,
          target_names=[label_encoder.classes_[i] for i in val_labels],
          zero_division=0
      ))

Accuracy: 0.0

Classification Report:
               precision    recall  f1-score   support

      apple2       0.00      0.00      0.00       1.0
       bike2       0.00      0.00      0.00       1.0
         car       0.00      0.00      0.00       1.0

   micro avg       0.00      0.00      0.00       3.0
   macro avg       0.00      0.00      0.00       3.0
weighted avg       0.00      0.00      0.00       3.0



In [7]:


# Save models in root 'models' folder
os.makedirs("../models", exist_ok=True)

os.makedirs("models", exist_ok=True)

# Save classifier
with open("models/logreg_classifier.pkl", "wb") as f:
    pickle.dump(clf, f)

# Save label encoder
with open("models/label_encoder.pkl", "wb") as f:
    pickle.dump(label_encoder, f)

print("Classifier & Label Encoder saved successfully")


Classifier & Label Encoder saved successfully


In [79]:

# import numpy as np

# arr = np.load("../embeddings/train_embeddings.npy", allow_pickle=True)
# print("Shape:", arr.shape)
# print("Dtype:", arr.dtype)
# print("First element type:", type(arr[0]))
