In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, accuracy_score

# Load data
train = pd.read_csv('/kaggle/input/manifest/manifest.csv')
val = pd.read_csv('/kaggle/input/test-train/val.csv')

# Extract features/labels
X_train = train['text'].astype(str)
y_train = train['label']

X_val = val['text'].astype(str)
y_val = val['label']

# Encode labels
le = LabelEncoder()
y_train_enc = le.fit_transform(y_train)
y_val_enc = le.transform(y_val)

# Text â†’ TF-IDF vectors
tfidf = TfidfVectorizer(max_features=5000, ngram_range=(1,2))
X_train_vec = tfidf.fit_transform(X_train)
X_val_vec = tfidf.transform(X_val)

# Train XGBoost model
model = XGBClassifier(
    n_estimators=300,
    learning_rate=0.1,
    max_depth=6,
    subsample=0.9,
    colsample_bytree=0.9,
    eval_metric='mlogloss'
)

model.fit(X_train_vec, y_train_enc)

# Predict
pred = model.predict(X_val_vec)

# Metrics
print("Accuracy:", accuracy_score(y_val_enc, pred))
print(classification_report(y_val_enc, pred, target_names=le.classes_))


Accuracy: 0.9750499001996008
              precision    recall  f1-score   support

       email       1.00      0.98      0.99       513
        form       0.98      0.96      0.97       501
     invoice       0.94      0.97      0.95       496
      letter       0.99      0.97      0.98       493
news_article       0.95      0.98      0.96       492
      resume       1.00      0.99      1.00       511

    accuracy                           0.98      3006
   macro avg       0.98      0.97      0.97      3006
weighted avg       0.98      0.98      0.98      3006



In [2]:
import joblib

# Create a folder to save artifacts
import os
os.makedirs("model", exist_ok=True)

# Save each component
joblib.dump(model, "model/xgb_model_new.pkl")
joblib.dump(tfidf, "model/tfidf_vectorizer_new.pkl")
joblib.dump(le, "model/label_encoder_new.pkl")
print("Model saved successfully!")


Model saved successfully!
