In [None]:
import pandas as pd
import numpy as np
import pickle
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder, StandardScaler
from imblearn.over_sampling import SMOTE  # For handling class imbalance

# Load dataset
file_path = "dataset.csv"
df = pd.read_csv(file_path)

# Define target variable
target_column = 'Suggested Job Role'

# Split dataset into features and target
X = df.drop(columns=[target_column])  # Features
y = df[target_column]  # Target labels

# Encode target labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Feature scaling (Standardize)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Handle class imbalance using SMOTE
smote = SMOTE(sampling_strategy='auto', random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_scaled, y_encoded)

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

# Hyperparameter tuning for Random Forest
rf_model = RandomForestClassifier(n_estimators=500, max_depth=15, min_samples_split=5, random_state=42)

# Train the model
rf_model.fit(X_train, y_train)

# Predict on test data
y_pred = rf_model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Test Accuracy: {accuracy:.4f}")

# Display classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))

# Save model as a pickle file
with open("model.pkl", "wb") as model_file:
    pickle.dump(rf_model, model_file)

# Save label encoder
with open("label_encoder.pkl", "wb") as le_file:
    pickle.dump(label_encoder, le_file)

# Save scaler
with open("scaler.pkl", "wb") as scaler_file:
    pickle.dump(scaler, scaler_file)

Test Accuracy: 0.7029

Classification Report:
                       precision    recall  f1-score   support

     Business Analyst       0.76      0.67      0.71        51
       Cloud Engineer       0.85      0.83      0.84        47
Cybersecurity Analyst       0.56      0.44      0.49        55
         Data Analyst       0.84      0.84      0.84        61
        Data Engineer       0.35      0.33      0.34        51
          Design & UX       0.74      0.74      0.74        57
 Full Stack Developer       0.88      0.98      0.93        47
           IT Auditor       0.76      0.97      0.85        38
   IT Project Manager       0.76      0.71      0.73        52
IT Support Specialist       0.73      0.63      0.68        52
     Mobile Developer       0.88      0.92      0.90        49
          QA Engineer       0.76      0.71      0.74        49
    Software Engineer       0.22      0.28      0.25        46
 System Administrator       0.81      0.87      0.84        45

       