In [17]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report


In [18]:
# Loading the dataset from the device and understanding the data

file = open("/kaggle/input/123pilay/dataset9000.csv", "r")
df = pd.read_csv(file)
df

Unnamed: 0,Database Fundamentals,Computer Architecture,Distributed Computing Systems,Cyber Security,Networking,Software Development,Programming Skills,Project Management,Computer Forensics Fundamentals,Technical Communication,AI ML,Software Engineering,Business Analysis,Communication skills,Data Science,Troubleshooting skills,Graphics Designing,Role
0,Professional,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Database Administrator
1,Professional,Poor,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Database Administrator
2,Professional,Beginner,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Database Administrator
3,Professional,Average,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Database Administrator
4,Professional,Intermediate,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Database Administrator
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9174,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Poor,Poor,Professional,Graphics Designer
9175,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Beginner,Beginner,Professional,Graphics Designer
9176,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Average,Average,Professional,Graphics Designer
9177,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Not Interested,Intermediate,Intermediate,Professional,Graphics Designer


In [19]:
# Preparing and Cleaning the data

df.isnull().any()   # Checking for null values

Database Fundamentals              False
Computer Architecture              False
Distributed Computing Systems      False
Cyber Security                     False
Networking                         False
Software Development               False
Programming Skills                 False
Project Management                 False
Computer Forensics Fundamentals    False
Technical Communication            False
AI ML                              False
Software Engineering               False
Business Analysis                  False
Communication skills               False
Data Science                       False
Troubleshooting skills             False
Graphics Designing                  True
Role                               False
dtype: bool

In [20]:
df.dropna(subset=['Graphics Designing'],inplace = True) # Dropping null values from the dataset
df.isnull().any()                                       # Checking for any null values

Database Fundamentals              False
Computer Architecture              False
Distributed Computing Systems      False
Cyber Security                     False
Networking                         False
Software Development               False
Programming Skills                 False
Project Management                 False
Computer Forensics Fundamentals    False
Technical Communication            False
AI ML                              False
Software Engineering               False
Business Analysis                  False
Communication skills               False
Data Science                       False
Troubleshooting skills             False
Graphics Designing                 False
Role                               False
dtype: bool

In [21]:
# Now Converting the Data to numerical value for processing
category_mapping = {
    "Not Interested": 1,
    "Poor": 2,
    "Beginner": 3,
    "Average": 4,
    "Intermediate": 5,
    "Excellent": 6,
    "Professional": 7
}

df.replace(category_mapping, inplace=True)

In [22]:
# Splitting the data into attributes (X) and target variable (Y)

X = df.drop("Role", axis=1)
Y = df["Role"]

In [23]:
# Splitting the data into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25, random_state=42)

In [31]:
# Creating a decision tree classifier
DTClassifier = DecisionTreeClassifier(criterion="entropy")

In [32]:
# Providing data to the decision tree classifier

DTClassifier.fit(X_train, Y_train)

In [26]:
# Making predictions on the testing data

predictions = DTClassifier.predict(X_test)

In [27]:
# Evaluating the model
accuracy = accuracy_score(Y_test, predictions)
classification_report_result = classification_report(Y_test, predictions)

In [34]:
print(f'Accuracy: {accuracy}')
print(f'Classification Report:\n{classification_report_result}')

Accuracy: 1.0
Classification Report:
                                 precision    recall  f1-score   support

               AI ML Specialist       1.00      1.00      1.00       123
                 API Specialist       1.00      1.00      1.00       118
   Application Support Engineer       1.00      1.00      1.00       147
               Business Analyst       1.00      1.00      1.00       131
     Customer Service Executive       1.00      1.00      1.00       129
      Cyber Security Specialist       1.00      1.00      1.00       123
                 Data Scientist       1.00      1.00      1.00       124
         Database Administrator       1.00      1.00      1.00       147
              Graphics Designer       1.00      1.00      1.00       141
              Hardware Engineer       1.00      1.00      1.00       130
              Helpdesk Engineer       1.00      1.00      1.00       154
Information Security Specialist       1.00      1.00      1.00       151
            N

In [37]:
# Making prediction for new data

new_data = pd.DataFrame({
    "Database Fundamentals": ["Poor"],
    "Computer Architecture": ["Poor"],
    "Distributed Computing Systems": ["Poor"],
    "Cyber Security": ["Poor"],
    "Networking": ["Poor"],
    "Software Development": ["Professional"],
    "Programming Skills": ["Excellent"],
    "Project Management": ["Average"],
    "Computer Forensics Fundamentals": ["Poor"],
    "Technical Communication": ["Poor"],
    "AI ML": ["Poor"],
    "Software Engineering": ["Average"],
    "Business Analysis": ["Poor"],
    "Communication skills": ["Average"],
    "Data Science": ["Average"],
    "Troubleshooting skills": ["Poor"],
    "Graphics Designing": ["Poor"]
})

# Converting the new data into numerical value using category_mapping as above
new_data.replace(category_mapping, inplace=True)

prediction = DTClassifier.predict(new_data)
print(f'Predicted Role: {prediction}')



Predicted Role: ['Software Developer']


In [40]:
# Now making the user provide their information to predict a data

columns = ["Database Fundamentals", "Computer Architecture", "Distributed Computing Systems",
           "Cyber Security", "Networking", "Software Development", "Programming Skills",
           "Project Management", "Computer Forensics Fundamentals", "Technical Communication",
           "AI ML", "Software Engineering", "Business Analysis", "Communication skills",
           "Data Science", "Troubleshooting skills", "Graphics Designing"]

# Making prediction for new data provided by the user
user_data = {}

# Asking the user for input for each feature
for feature in columns:
    user_input = input(f"Enter your assessment for {feature} (e.g., Poor, Average, Excellent,Not Interested,Professional, Intermediate,Beginner) ")
    user_data[feature] = [user_input]

# Creating a DataFrame from the user's input
user_data = pd.DataFrame(user_data)

# Converting the new data into numerical value using category_mapping as above
user_data.replace(category_mapping, inplace=True)

# Here, you can add any preprocessing steps if needed

# Assuming DTClassifier is already defined and trained
prediction = DTClassifier.predict(user_data)
print(f'Predicted Role: {prediction}')

Enter your assessment for Database Fundamentals (e.g., Poor, Average, Excellent,Not Interested,Professional, Intermediate,Beginner):  Poor
Enter your assessment for Computer Architecture (e.g., Poor, Average, Excellent,Not Interested,Professional, Intermediate,Beginner):  Poor
Enter your assessment for Distributed Computing Systems (e.g., Poor, Average, Excellent,Not Interested,Professional, Intermediate,Beginner):  Poor
Enter your assessment for Cyber Security (e.g., Poor, Average, Excellent,Not Interested,Professional, Intermediate,Beginner):  Poor
Enter your assessment for Networking (e.g., Poor, Average, Excellent,Not Interested,Professional, Intermediate,Beginner):  Poor
Enter your assessment for Software Development (e.g., Poor, Average, Excellent,Not Interested,Professional, Intermediate,Beginner):  Average
Enter your assessment for Programming Skills (e.g., Poor, Average, Excellent,Not Interested,Professional, Intermediate,Beginner):  Average
Enter your assessment for Project M

Predicted Role: ['Helpdesk Engineer']
