In [3]:
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import FunctionTransformer
from sklearn.preprocessing import OneHotEncoder
import pandas as pd
import joblib
import numpy as np

In [4]:
data = [
    {
        "department": "Computer Engineering",
        "cgpa": 4.3,
        "interests": "machine learning, ai, data, coding",
        "suggested_career": "Data Scientist"
    },
    {
        "department": "Mechanical Engineering",
        "cgpa": 3.2,
        "interests": "design, 3D modelling, thermodynamics",
        "suggested_career": "CAD Designer"
    },
    {
        "department": "Electrical Engineering",
        "cgpa": 4.0,
        "interests": "circuits, power systems, renewable energy",
        "suggested_career": "Power Systems Engineer"
    },
    {
        "department": "Systems Engineering",
        "cgpa": 3.6,
        "interests": "python, automation, optimization, ai",
        "suggested_career": "AI Engineer"
    },
    {
        "department": "Computer Science",
        "cgpa": 3.9,
        "interests": "web development, ui, frontend, design",
        "suggested_career": "Frontend Developer"
    },
    {
        "department": "Civil Engineering",
        "cgpa": 3.4,
        "interests": "buildings, design, architecture, drawing",
        "suggested_career": "Structural Engineer"
    },
    {
        "department": "Computer Engineering",
        "cgpa": 2.8,
        "interests": "hardware, low level, embedded systems",
        "suggested_career": "Embedded Systems Engineer"
    },
    {
        "department": "Electrical Engineering",
        "cgpa": 3.1,
        "interests": "networking, routers, cisco, protocols",
        "suggested_career": "Network Engineer"
    },
    {
        "department": "Systems Engineering",
        "cgpa": 4.5,
        "interests": "data analytics, big data, statistics",
        "suggested_career": "Data Analyst"
    },
    {
        "department": "Mechanical Engineering",
        "cgpa": 2.7,
        "interests": "vehicles, engines, manufacturing",
        "suggested_career": "Automobile Engineer"
    },
]


In [5]:
df = pd.DataFrame(data)

In [6]:
df.head()

Unnamed: 0,department,cgpa,interests,suggested_career
0,Computer Engineering,4.3,"machine learning, ai, data, coding",Data Scientist
1,Mechanical Engineering,3.2,"design, 3D modelling, thermodynamics",CAD Designer
2,Electrical Engineering,4.0,"circuits, power systems, renewable energy",Power Systems Engineer
3,Systems Engineering,3.6,"python, automation, optimization, ai",AI Engineer
4,Computer Science,3.9,"web development, ui, frontend, design",Frontend Developer


In [7]:
features = ['cgpa', 'interests', 'department']
X = df[features]
y = df.drop(columns=features)

In [15]:
preprocessing = ColumnTransformer(transformers=[
    ('num', StandardScaler(), ['cgpa']),
    ('txt', TfidfVectorizer(), 'interests'),
    ('cat', OneHotEncoder(), ['department'])
]
)


In [16]:
model = make_pipeline(preprocessing, LogisticRegression())

In [18]:
model.fit(X, y.values.ravel())

In [19]:
test_data = pd.DataFrame([{
    "cgpa": 3.7,
    "interests": "data, machine learning",
    "department": "Systems Engineering"
}])

prediction = model.predict(test_data)
print("Suggested career:", prediction[0])

Suggested career: AI Engineer


In [17]:
joblib.dump(model, 'model.pkl')

['model.pkl']