In [11]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
import joblib  # For saving label encoder & scaler

In [None]:
# Load dataset
df = pd.read_csv("figma_dataset.csv")

In [13]:

# Drop unnecessary columns (assuming `tag` is the target)
X = df.drop(columns=["tag"])  # Features
y = df["tag"]  # Target variable

In [None]:
# Convert categorical features to numerical values
categorical_cols = ["type", "parent_tag","characters", "font_name"]  # Adjust as needed
for col in categorical_cols:
    X[col] = X[col].astype(str)  # Ensure all categorical data is string
    X[col] = LabelEncoder().fit_transform(X[col])

In [15]:
# Fill missing values (NaNs) with 0
X = X.fillna(0)

In [16]:
# Normalize numerical features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


ValueError: Input X contains infinity or a value too large for dtype('float64').

In [None]:
# Encode target labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

In [None]:
# Save encoders
joblib.dump(label_encoder, "label_encoder.pkl")
joblib.dump(scaler, "scaler.pkl")

['scaler.pkl']

In [None]:
# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_encoded, test_size=0.2, random_state=42)

In [None]:
# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

In [None]:
# Define Neural Network Model
class TagClassifier(nn.Module):
    def __init__(self, input_size, output_size):
        super(TagClassifier, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, output_size)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [None]:
# Initialize model
input_size = X_train.shape[1]
output_size = len(label_encoder.classes_)
model = TagClassifier(input_size, output_size)

In [None]:
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
# Training loop
num_epochs = 200
for epoch in range(num_epochs):
    optimizer.zero_grad()
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}")

Epoch [10/200], Loss: 4.9893
Epoch [20/200], Loss: 4.6826
Epoch [30/200], Loss: 4.1384
Epoch [40/200], Loss: 3.3126
Epoch [50/200], Loss: 2.4524
Epoch [60/200], Loss: 2.0758
Epoch [70/200], Loss: 1.9693
Epoch [80/200], Loss: 1.9193
Epoch [90/200], Loss: 1.8737
Epoch [100/200], Loss: 1.8381
Epoch [110/200], Loss: 1.8013
Epoch [120/200], Loss: 1.7632
Epoch [130/200], Loss: 1.7236
Epoch [140/200], Loss: 1.6844
Epoch [150/200], Loss: 1.6466
Epoch [160/200], Loss: 1.6109
Epoch [170/200], Loss: 1.5772
Epoch [180/200], Loss: 1.5447
Epoch [190/200], Loss: 1.5127
Epoch [200/200], Loss: 1.4810


In [None]:
# Save model
torch.save(model.state_dict(), "tag_classifier.pth")

In [None]:
# Evaluation
model.eval()
with torch.no_grad():
    y_pred = model(X_test_tensor)
    y_pred_classes = torch.argmax(y_pred, dim=1).numpy()

In [None]:
accuracy = accuracy_score(y_test, y_pred_classes)
print(f'Accuracy: {accuracy:.4f}')

Accuracy: 0.5951


In [None]:
# Generate classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred_classes, labels=np.unique(y_test),target_names=label_encoder.inverse_transform(np.unique(y_test))))



Classification Report:
                                           precision    recall  f1-score   support

                                        A       0.80      0.55      0.65      3120
                                     ABBR       0.00      0.00      0.00         3
                                  ADDRESS       0.00      0.00      0.00        17
ADS-HOME-PAGE-EDITORIAL-SPOTLIGHT-MANAGER       0.00      0.00      0.00         1
     ADS-HOME-PAGE-FEATURED-MEDIA-MANAGER       0.00      0.00      0.00         1
                                  ARTICLE       0.00      0.00      0.00       183
                                    ASIDE       0.00      0.00      0.00         9
          AUTH-FLOW-GOOGLE-ONE-TAP-PROMPT       0.00      0.00      0.00         1
                                        B       0.00      0.00      0.00         7
                                     BODY       0.88      0.35      0.50        20
                                   BUTTON       0.87      0.07

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
# Load model (if needed)
loaded_model = TagClassifier(input_size, output_size)
loaded_model.load_state_dict(torch.load("tag_classifier.pth"))
loaded_model.eval()

  loaded_model.load_state_dict(torch.load("tag_classifier.pth"))


TagClassifier(
  (fc1): Linear(in_features=43, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=32, bias=True)
  (fc3): Linear(in_features=32, out_features=196, bias=True)
  (relu): ReLU()
)