In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import pickle

# Load the dataset
df = pd.read_csv("/content/mldata.csv")

# Number Encoding
cols_to_encode = ["self-learning capability?", "Extra-courses did", "Taken inputs from seniors or elders",
                  "worked in teams ever?", "Introvert"]
for col in cols_to_encode:
    df[col] = df[col].map({"yes": 1, "no": 0})

# Ordinal Encoding
ordinal_cols = ["reading and writing skills", "memory capability score"]
ordinal_mapping = {"poor": 0, "medium": 1, "excellent": 2}
for col in ordinal_cols:
    df[col] = df[col].map(ordinal_mapping)

# One-hot Encoding
category_cols = ['certifications', 'workshops', 'Interested subjects', 'interested career area ',
                 'Type of company want to settle in?', 'Interested Type of Books']
df = pd.get_dummies(df, columns=category_cols, prefix=category_cols)

# Feature selection
features = ['Logical quotient rating', 'coding skills rating', 'hackathons', 'public speaking points',
            'self-learning capability?', 'Extra-courses did', 'Taken inputs from seniors or elders',
            'worked in teams ever?', 'Introvert', 'reading and writing skills', 'memory capability score']

# Train-test split
X = df[features].copy()
y = df['Suggested Job Role'].copy()
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Random Forest Classifier
clf = RandomForestClassifier(n_estimators=500)
clf.fit(x_train, y_train)

# Save the trained model
with open('/content/model.pkl', 'wb') as file:
    pickle.dump(clf, file)

# Load the trained model
with open('/content/model.pkl', 'rb') as file:
    clf = pickle.load(file)

# Make predictions
y_pred = clf.predict(x_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print("Random Forest Classifier Accuracy:", accuracy)