In [1]:
import os
import pandas as pd
import numpy as np
import joblib  # To save models
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.impute import SimpleImputer



# Load data
# train_df = pd.read_csv("train.csv")
train_df = pd.read_csv("train.csv")

# Drop 'id' as it's not a feature
train_df.drop(columns=['id'], inplace=True)

# Handle missing values
train_df.fillna(method='ffill', inplace=True)

# Identify categorical and numerical columns
categorical_cols = ['Brand', 'Material', 'Size', 'Style', 'Color', 'Laptop Compartment', 'Waterproof']
numerical_cols = [col for col in train_df.columns if col not in categorical_cols + ['Price']]

# Encode categorical features
label_encoders = {}
for col in categorical_cols:
    le = LabelEncoder()
    train_df[col] = le.fit_transform(train_df[col].astype(str))
    label_encoders[col] = le  # Store encoder for later use

# Define features and target
X = train_df.drop(columns=['Price'])
y = train_df['Price']

# Split training data for validation
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Handle missing values in numerical features
imputer = SimpleImputer(strategy='mean')
X_train[numerical_cols] = imputer.fit_transform(X_train[numerical_cols])
X_val[numerical_cols] = imputer.transform(X_val[numerical_cols])

# Scale numerical features
scaler = StandardScaler()
X_train[numerical_cols] = scaler.fit_transform(X_train[numerical_cols])
X_val[numerical_cols] = scaler.transform(X_val[numerical_cols])

# Models
models = {
    "SVM": SVR(kernel='rbf'),
    "Neural Network": MLPRegressor(hidden_layer_sizes=(100,50), max_iter=500, random_state=42),
    "Decision Tree": DecisionTreeRegressor(random_state=42),
    "Random Forest": RandomForestRegressor(n_estimators=100, random_state=42),
    "Gradient Boosting": GradientBoostingRegressor(n_estimators=100, random_state=42)
}

# Create "models" directory if it doesn't exist
os.makedirs("models", exist_ok=True)

# Train models and save them
for name, model in models.items():
    model.fit(X_train, y_train)
    joblib.dump(model, f"models/{name}.pkl")  # Save each model

# Save preprocessing tools
joblib.dump(label_encoders, "models/label_encoders.pkl")
joblib.dump(imputer, "models/imputer.pkl")
joblib.dump(scaler, "models/scaler.pkl")

print("Models and preprocessing tools saved!")


  train_df.fillna(method='ffill', inplace=True)


Models and preprocessing tools saved!
