In [3]:
conda install -c conda-forge xgboost=1.7

Channels:
 - conda-forge
 - defaults
 - anaconda
Platform: win-64
Collecting package metadata (repodata.json): ...working... done
Solving environment: ...working... done

## Package Plan ##

  environment location: d:\Anaconda\envs\py_gpu

  added / updated specs:
    - xgboost=1.7


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    _py-xgboost-mutex-2.0      |            cpu_0          11 KB  conda-forge
    libxgboost-1.7.6           |   cpu_h0f92c63_6         1.1 MB  conda-forge
    py-xgboost-1.7.6           |cpu_py310h09d19d8_6         208 KB  conda-forge
    xgboost-1.7.6              |cpu_py310h09d19d8_6          15 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         1.3 MB

The following NEW packages will be INSTALLED:

  _py-xgboost-mutex  conda-forge/win-64::_py-xgboost-mutex-2.0-cpu_0 

The fo

In [1]:
# ✅ Imports
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, MultiLabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from xgboost import XGBClassifier

# 📊 Load Dataset
df = pd.read_csv('waste_product_v1.csv')  # your generated dataset

# 🧱 Preprocessing
target_encoder = LabelEncoder()
df['Project_Label'] = target_encoder.fit_transform(df['Project'])

df['Waste_Items_List'] = df['Waste Items'].apply(lambda x: [item.strip() for item in x.split(',')])
df['Tools_List'] = df['Tools'].apply(lambda x: [item.strip() for item in x.split(',')])

mlb_waste = MultiLabelBinarizer()
mlb_tools = MultiLabelBinarizer()

waste_features = mlb_waste.fit_transform(df['Waste_Items_List'])
tools_features = mlb_tools.fit_transform(df['Tools_List'])

X = np.hstack([waste_features, tools_features])
y = df['Project_Label'].values

# Split dataset
X_train, X_valid, y_train, y_valid = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# ✅ XGBoost Model
xgb_model = XGBClassifier(
    objective='multi:softprob',  # multi-class classification with probability output
    num_class=len(np.unique(y)),
    eval_metric='mlogloss',
    use_label_encoder=False,
    n_estimators=200,
    max_depth=6,
    learning_rate=0.1
)

# ⏱ Train
xgb_model.fit(X_train, y_train, eval_set=[(X_valid, y_valid)], early_stopping_rounds=10, verbose=True)

# ✅ Evaluation
y_pred = xgb_model.predict(X_valid)
accuracy = accuracy_score(y_valid, y_pred)
f1 = f1_score(y_valid, y_pred, average='weighted')

print(f'Validation Accuracy: {accuracy:.4f}')
print(f'Validation F1-Score: {f1:.4f}')

# ✅ Inference
def predict_project(waste_items, tools):
    waste_vec = mlb_waste.transform([waste_items])
    tools_vec = mlb_tools.transform([tools])
    X_input = np.hstack([waste_vec, tools_vec])
    
    pred_label = xgb_model.predict(X_input)[0]
    project_name = target_encoder.inverse_transform([pred_label])[0]
    return project_name

# Example
new_waste_items = ["Ribbon Scrap", "Rope Piece"]
new_tools = ["Glue", "Scissors"]

predicted_project = predict_project(new_waste_items, new_tools)
print(f"Suggested DIY Project: {predicted_project}")




[0]	validation_0-mlogloss:3.80668
[1]	validation_0-mlogloss:3.80690
[2]	validation_0-mlogloss:3.80713
[3]	validation_0-mlogloss:3.80746
[4]	validation_0-mlogloss:3.80778
[5]	validation_0-mlogloss:3.80815
[6]	validation_0-mlogloss:3.80876
[7]	validation_0-mlogloss:3.80920
[8]	validation_0-mlogloss:3.80974
[9]	validation_0-mlogloss:3.81018
Validation Accuracy: 0.0237
Validation F1-Score: 0.0231
Suggested DIY Project: Creative Pouch


