In [None]:
import pandas as pd
import numpy as np
import pickle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.utils.class_weight import compute_class_weight

# Load dataset
df = pd.read_csv("data/Copper_Set.csv")

# Mappings
status_mapping = {'Lost': 0, 'Won': 1}
item_type_mapping = {'W': 1, 'WI': 2, 'S': 3, 'Others': 4, 'PL': 5, 'IPL': 6, 'SLAWR': 7}

df = df[df['status'].isin(status_mapping.keys())]
df['status'] = df['status'].map(status_mapping)
df['item type'] = df['item type'].map(item_type_mapping)

# Convert expected numeric columns
numeric_cols = ['quantity tons', 'selling_price', 'application', 'thickness', 'width',
                'country', 'customer', 'product_ref']
df[numeric_cols] = df[numeric_cols].apply(pd.to_numeric, errors='coerce')

# Drop rows with missing values in features
df = df.dropna(subset=numeric_cols)

# Log transformations
df['quantity_log'] = np.log(df['quantity tons'] + 1e-6)
df['selling_price_log'] = np.log(df['selling_price'] + 1e-6)
df['thickness_log'] = np.log(df['thickness'] + 1e-6)

# Features and target
X = df[['quantity_log', 'selling_price_log', 'item type', 'application', 'thickness_log',
        'width', 'country', 'customer', 'product_ref']]
y = df['status']

# Split and scale
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler_clf = StandardScaler()
X_train_scaled = scaler_clf.fit_transform(X_train)
X_test_scaled = scaler_clf.transform(X_test)

# Handle class imbalance
classes = np.unique(y_train)
weights = compute_class_weight(class_weight='balanced', classes=classes, y=y_train)
class_weights = dict(zip(classes, weights))

# Model training
model_clf = XGBClassifier(n_estimators=150, max_depth=6, random_state=42, scale_pos_weight=weights[0]/weights[1])
model_clf.fit(X_train_scaled, y_train)

# Evaluation
y_pred = model_clf.predict(X_test_scaled)
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

# Save model and scaler
with open("Classification_Model.pkl", "wb") as f:
    pickle.dump(model_clf, f)
with open("scaler_clf.pkl", "wb") as f:
    pickle.dump(scaler_clf, f)


  df = pd.read_csv("models/Copper_Set.csv")
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)


[[ 3592  3363]
 [  223 22911]]
              precision    recall  f1-score   support

           0       0.94      0.52      0.67      6955
           1       0.87      0.99      0.93     23134

    accuracy                           0.88     30089
   macro avg       0.91      0.75      0.80     30089
weighted avg       0.89      0.88      0.87     30089

