In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

In [4]:
df = pd.read_csv("careermitra_dataset.csv")

In [15]:
def map_job_roles(role):
    if 'Developer' in role or 'Engineer' in role:
        return 'Developer'
    elif 'Analyst' in role:
        return 'Analyst'
    elif 'Manager' in role:
        return 'Management'
    elif 'Support' in role or 'Help Desk' in role:
        return 'Support'
    elif 'QA' in role or 'Testing' in role:
        return 'Testing'
    elif 'Design' in role or 'UX' in role:
        return 'Design'
    elif 'Admin' in role:
        return 'Administrator'
    else:
        return 'Others'

df['JobCategory'] = df['Suggested Job Role'].apply(map_job_roles)

In [16]:
# Select relevant features and label
features = [
    'Acedamic percentage in Operating Systems',
    'percentage in Algorithms',
    'Percentage in Programming Concepts',
    'Percentage in Software Engineering',
    'Percentage in Computer Networks',
    'Percentage in Electronics Subjects',
    'Percentage in Computer Architecture',
    'Percentage in Mathematics',
    'Percentage in Communication skills'
]
X = df[features]
y = df['JobCategory']

In [17]:
le = LabelEncoder()
y_encoded = le.fit_transform(y)

In [18]:
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)


In [19]:
# Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
rf = RandomForestClassifier(n_estimators=200, random_state=42)
rf.fit(X_train_scaled, y_train)

In [None]:
y_pred = rf.predict(X_test_scaled)

In [12]:
print("Accuracy Score:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=le.classes_))

Accuracy Score: 0.03425

Classification Report:
                                            precision    recall  f1-score   support

                   Applications Developer       0.02      0.02      0.02       115
            Business Intelligence Analyst       0.00      0.00      0.00        97
                 Business Systems Analyst       0.04      0.04      0.04       123
                     CRM Business Analyst       0.01      0.02      0.02       104
                  CRM Technical Developer       0.00      0.00      0.00       100
                           Data Architect       0.02      0.02      0.02       115
                   Database Administrator       0.05      0.05      0.05       122
                       Database Developer       0.01      0.01      0.01       112
                         Database Manager       0.05      0.04      0.05       122
                              Design & UX       0.01      0.01      0.01       122
                       E-Commerce Ana

In [14]:
print(pd.Series(y).value_counts())

Suggested Job Role
Network Security Administrator               1112
Network Security Engineer                     630
Network Engineer                              621
Project Manager                               602
Database Administrator                        593
Portal Administrator                          593
Information Technology Manager                591
Software Engineer                             590
UX Designer                                   589
Design & UX                                   588
Software Developer                            587
CRM Business Analyst                          584
Business Systems Analyst                      582
Database Developer                            581
Solutions Architect                           578
Software Systems Engineer                     575
Software Quality Assurance (QA) / Testing     571
Web Developer                                 570
Database Manager                              570
CRM Technical Developer        