In [1]:
# train_and_export_model.py

import pandas as pd
import pickle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler, LabelEncoder
from sklearn.neural_network import MLPClassifier
from imblearn.over_sampling import RandomOverSampler
from scipy import sparse

# Load the dataset
data = pd.read_csv("careermitra_dataset.csv")

# Separate features and target
X_raw = data.drop(columns=['Suggested Job Role'])
y_raw = data['Suggested Job Role']

# Encode categorical and numerical features
categorical_cols = X_raw.select_dtypes(include='object').columns
numerical_cols = X_raw.select_dtypes(exclude='object').columns

encoder = OneHotEncoder(sparse_output=True, handle_unknown='ignore')
X_encoded = encoder.fit_transform(X_raw[categorical_cols])

X_combined = sparse.hstack([X_raw[numerical_cols], X_encoded])

scaler = StandardScaler(with_mean=False)
X_scaled = scaler.fit_transform(X_combined)

# Encode target
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y_raw)

# Balance the data
ros = RandomOverSampler(random_state=42)
X_resampled, y_resampled = ros.fit_resample(X_scaled, y_encoded)

# Split into training and testing
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

# Train MLP Classifier
model = MLPClassifier(
    activation='tanh',
    hidden_layer_sizes=(250, 250, 250),
    solver='adam',
    max_iter=300,
    random_state=42
)
model.fit(X_train, y_train)

# Save model and encoders
with open("mlp_model.pkl", "wb") as f:
    pickle.dump(model, f)
with open("encoder.pkl", "wb") as f:
    pickle.dump(encoder, f)
with open("scaler.pkl", "wb") as f:
    pickle.dump(scaler, f)
with open("label_encoder.pkl", "wb") as f:
    pickle.dump(label_encoder, f)

print("Model and encoders exported successfully.")


Model and encoders exported successfully.
