In [2]:
!pip3 install tabpfn



In [2]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import torch

# Load your dataset
df = pd.read_csv("C:/Users/R.Parsad/Downloads/TabPFN/Car/car.csv")

# Separate features and target
X = df.drop("Class", axis=1)
y = df["Class"]

# Encode categorical features (simple label encoding)
for col in X.select_dtypes(include=['object', 'category']).columns:
    X[col] = LabelEncoder().fit_transform(X[col])

# Encode target if it's categorical
if y.dtype == "object" or str(y.dtype).startswith("category"):
    y = LabelEncoder().fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(X.values, y, test_size=0.2, random_state=42)


In [4]:
from tabpfn import TabPFNClassifier
from sklearn.metrics import accuracy_score

import os
os.environ["TABPFN_ALLOW_CPU_LARGE_DATASET"] = "1"

# Initialize model
clf = TabPFNClassifier(device='cuda' if torch.cuda.is_available() else 'cpu')

# Fit on training data
clf.fit(X_train, y_train)

# Predict
y_pred = clf.predict(X_test)

# Evaluate
print("Accuracy:", accuracy_score(y_test, y_pred))


Accuracy: 0.9855491329479769


In [5]:
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score,
    f1_score, classification_report, confusion_matrix
)

# Predictions
y_pred = clf.predict(X_test)

# Metrics
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision (macro):", precision_score(y_test, y_pred, average='macro'))
print("Recall (macro):", recall_score(y_test, y_pred, average='macro'))
print("F1 Score (macro):", f1_score(y_test, y_pred, average='macro'))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


Accuracy: 0.9855491329479769
Precision (macro): 0.9426482571516221
Recall (macro): 0.9732459248759744
F1 Score (macro): 0.9562612870946512

Classification Report:
               precision    recall  f1-score   support

           0       0.99      0.95      0.97        83
           1       0.85      1.00      0.92        11
           2       1.00      1.00      1.00       235
           3       0.94      0.94      0.94        17

    accuracy                           0.99       346
   macro avg       0.94      0.97      0.96       346
weighted avg       0.99      0.99      0.99       346

Confusion Matrix:
 [[ 79   2   1   1]
 [  0  11   0   0]
 [  0   0 235   0]
 [  1   0   0  16]]


## Testing a custom neural network


In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import pandas as pd
import numpy as np

# Load your dataset
df = pd.read_csv("C:/Users/R.Parsad/Downloads/TabPFN/Car/car.csv")
X = df.drop("Class", axis=1)
y = df["Class"]

# Encode categorical features
for col in X.select_dtypes(include=["object", "category"]).columns:
    X[col] = LabelEncoder().fit_transform(X[col])
if y.dtype == "object" or str(y.dtype).startswith("category"):
    y = LabelEncoder().fit_transform(y)

# Normalize features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

train_loader = DataLoader(TensorDataset(X_train_tensor, y_train_tensor), batch_size=64, shuffle=True)

# Define the neural network
class SimpleNN(nn.Module):
    def __init__(self, input_dim, num_classes):
        super(SimpleNN, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, num_classes)
        )

    def forward(self, x):
        return self.model(x)

model = SimpleNN(input_dim=X.shape[1], num_classes=len(np.unique(y)))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Training
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

epochs = 20
model.train()
for epoch in range(epochs):
    running_loss = 0.0
    for xb, yb in train_loader:
        xb, yb = xb.to(device), yb.to(device)
        optimizer.zero_grad()
        outputs = model(xb)
        loss = criterion(outputs, yb)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch {epoch+1}/{epochs} - Loss: {running_loss/len(train_loader):.4f}")

# Evaluation
model.eval()
with torch.no_grad():
    outputs = model(X_test_tensor.to(device))
    _, y_pred = torch.max(outputs, 1)
    y_pred = y_pred.cpu().numpy()

# Metrics
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


Epoch 1/20 - Loss: 1.2180
Epoch 2/20 - Loss: 0.9277
Epoch 3/20 - Loss: 0.7940
Epoch 4/20 - Loss: 0.7315
Epoch 5/20 - Loss: 0.7117
Epoch 6/20 - Loss: 0.6826
Epoch 7/20 - Loss: 0.6696
Epoch 8/20 - Loss: 0.6537
Epoch 9/20 - Loss: 0.6395
Epoch 10/20 - Loss: 0.6202
Epoch 11/20 - Loss: 0.6068
Epoch 12/20 - Loss: 0.5867
Epoch 13/20 - Loss: 0.5679
Epoch 14/20 - Loss: 0.5472
Epoch 15/20 - Loss: 0.5256
Epoch 16/20 - Loss: 0.5187
Epoch 17/20 - Loss: 0.4980
Epoch 18/20 - Loss: 0.4788
Epoch 19/20 - Loss: 0.4665
Epoch 20/20 - Loss: 0.4462

Classification Report:
               precision    recall  f1-score   support

           0       0.62      0.58      0.60        83
           1       0.00      0.00      0.00        11
           2       0.87      0.98      0.92       235
           3       1.00      0.24      0.38        17

    accuracy                           0.82       346
   macro avg       0.62      0.45      0.48       346
weighted avg       0.79      0.82      0.79       346

Confusion

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


### Testing SVM on the dataset

In [8]:
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import pandas as pd

# Load dataset
df = pd.read_csv("C:/Users/R.Parsad/Downloads/TabPFN/Car/car.csv")
X = df.drop("Class", axis=1)
y = df["Class"]

# Encode categorical features
for col in X.select_dtypes(include=["object", "category"]).columns:
    X[col] = LabelEncoder().fit_transform(X[col])

# Encode target if necessary
if y.dtype == "object" or str(y.dtype).startswith("category"):
    y = LabelEncoder().fit_transform(y)

# Scale features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train SVM
svm_clf = SVC(kernel='rbf', C=1.0, gamma='scale')  # try kernel='linear' or 'poly' too
svm_clf.fit(X_train, y_train)

# Predict
y_pred = svm_clf.predict(X_test)

# Metrics
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


Classification Report:
               precision    recall  f1-score   support

           0       0.79      0.81      0.80        83
           1       0.44      0.36      0.40        11
           2       0.95      0.96      0.96       235
           3       0.94      0.88      0.91        17

    accuracy                           0.90       346
   macro avg       0.78      0.75      0.77       346
weighted avg       0.90      0.90      0.90       346

Confusion Matrix:
 [[ 67   5  11   0]
 [  6   4   0   1]
 [ 10   0 225   0]
 [  2   0   0  15]]
