Model training: Logistic Regression 

In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
import joblib

# 1. Load the cleaned dataset
df = pd.read_csv("products_clean_final.csv")
print(" Dataset loaded:", df.shape)

# 2. Remove rows with missing values
df = df.dropna()
print(" Rows with NaNs removed. Remaining shape:", df.shape)

# 3. Separate features and target
X = df.drop(columns=['condition'])  # Features
y = df['condition']  # Target: 0 = new, 1 = used

# 4. Split into training and testing sets (80/20), stratified
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# 5. Standardize numeric features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 6. Train Logistic Regression with L2 regularization
model = LogisticRegression(
    max_iter=1000,
    penalty='l2',
    solver='lbfgs',
    class_weight='balanced',
    random_state=42
)
model.fit(X_train_scaled, y_train)
print(" Logistic Regression model trained.")

# 7. Save model and scaler with unique names
joblib.dump(model, "logreg_product_condition_model.pkl")
joblib.dump(scaler, "logreg_scaler.pkl")
print(" Logistic model and scaler saved as 'logreg_*.pkl'")

# 8. Predict and evaluate
y_pred = model.predict(X_test_scaled)
report = classification_report(y_test, y_pred, target_names=["New", "Used"])
print("\n Classification Report:\n")
print(report)

# 9. Save report to uniquely named file
with open("logreg_classification_report.txt", "w") as f:
    f.write(report)
print(" Report saved to 'logreg_classification_report.txt'")


 Dataset loaded: (100000, 19)
 Rows with NaNs removed. Remaining shape: (95398, 19)
 Logistic Regression model trained.
 Logistic model and scaler saved as 'logreg_*.pkl'

 Classification Report:

              precision    recall  f1-score   support

         New       0.83      0.52      0.64     10432
        Used       0.60      0.87      0.71      8648

    accuracy                           0.68     19080
   macro avg       0.71      0.69      0.67     19080
weighted avg       0.72      0.68      0.67     19080

 Report saved to 'logreg_classification_report.txt'


Model training: Random Forest Classifier

In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
import joblib

# 1. Load the clean dataset
df = pd.read_csv("products_clean_final.csv")
print(" Dataset loaded:", df.shape)

# 2. Drop rows with missing values (to avoid NaN errors)
df = df.dropna()
print(" Rows with NaNs removed. Remaining shape:", df.shape)

# 3. Separate features and target
X = df.drop(columns=['condition'])  # Features
y = df['condition']                 # Target: 0 (new), 1 (used)

# 4. Split into training and test sets (80/20 split, stratified)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# 5. Standardize numeric features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 6. Train a Logistic Regression model with regularization (to prevent overfitting)
model = LogisticRegression(max_iter=1000, penalty='l2', solver='lbfgs', random_state=42)
model.fit(X_train_scaled, y_train)
print(" Model trained successfully.")

# 7. Save the model and scaler
joblib.dump(model, "product_condition_model.pkl")
joblib.dump(scaler, "scaler.pkl")
print(" Model and scaler saved.")

# 8. Evaluate the model
y_pred = model.predict(X_test_scaled)
report = classification_report(y_test, y_pred, target_names=["New", "Used"])

# Print metrics
print("\n Classification Report:\n")
print(report)

# 9. Save report to file
with open("classification_report.txt", "w") as f:
    f.write(report)
print(" Report saved to classification_report.txt")


 Dataset loaded: (100000, 19)
 Rows with NaNs removed. Remaining shape: (95398, 19)
 Model trained successfully.
 Model and scaler saved.

 Classification Report:

              precision    recall  f1-score   support

         New       0.77      0.63      0.69     10432
        Used       0.63      0.77      0.69      8648

    accuracy                           0.69     19080
   macro avg       0.70      0.70      0.69     19080
weighted avg       0.71      0.69      0.69     19080

 Report saved to classification_report.txt


Model training: Gradient Boosting Classifier

In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import classification_report
import joblib

# 1. Load the cleaned dataset
df = pd.read_csv("products_clean_final.csv")
print(" Dataset loaded:", df.shape)

# 2. Remove rows with missing values
df = df.dropna()
print(" Rows with NaNs removed. Remaining shape:", df.shape)

# 3. Separate features and target
X = df.drop(columns=['condition'])  # Features
y = df['condition']  # Target: 0 = new, 1 = used

# 4. Split into training and testing sets (80/20), stratified
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# 5. Standardize numeric features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 6. Train Gradient Boosting Classifier
model = GradientBoostingClassifier(
    n_estimators=100,
    learning_rate=0.1,
    max_depth=5,
    random_state=42
)
model.fit(X_train_scaled, y_train)
print(" Gradient Boosting model trained.")

# 7. Save model and scaler
joblib.dump(model, "gb_product_condition_model.pkl")
joblib.dump(scaler, "gb_scaler.pkl")
print(" Gradient Boosting model and scaler saved.")

# 8. Predict and evaluate
y_pred = model.predict(X_test_scaled)
report = classification_report(y_test, y_pred, target_names=["New", "Used"])
print("\n Classification Report:\n")
print(report)

# 9. Save report to file
with open("gb_classification_report.txt", "w") as f:
    f.write(report)
print(" Report saved to 'gb_classification_report.txt'")


 Dataset loaded: (100000, 19)
 Rows with NaNs removed. Remaining shape: (95398, 19)
 Gradient Boosting model trained.
 Gradient Boosting model and scaler saved.

 Classification Report:

              precision    recall  f1-score   support

         New       0.88      0.77      0.82     10432
        Used       0.76      0.87      0.81      8648

    accuracy                           0.82     19080
   macro avg       0.82      0.82      0.82     19080
weighted avg       0.83      0.82      0.82     19080

 Report saved to 'gb_classification_report.txt'
