In [1]:
import pandas as pd

# Specify the path to your Excel file
file_path = 'Faculty_profile.csv'

# Load the data into a DataFrame
df = pd.read_csv(file_path)

# Display the first few rows of the DataFrame to ensure it's loaded correctly

# Extract relevant columns
qual_design_df = df[['Qualification', 'Designation']]
print(qual_design_df)

        Qualification          Designation
0                 PhD            PROFESSOR
1         B.E  & M.E   ASSOCIATE PROFESSOR
2      B.E  & M.TECH   ASSOCIATE PROFESSOR
3      B.E  & M.TECH   ASSOCIATE PROFESSOR
4      B.E  & M.TECH   ASSISTANT PROFESSOR
5         B.E  & M.E   ASSISTANT PROFESSOR
6   B.TECH   & M.TECH  ASSISTANT PROFESSOR
7   B.TECH   & M.TECH  ASSISTANT PROFESSOR
8   B.TECH   & M.TECH  ASSISTANT PROFESSOR
9   B.TECH   & M.TECH  ASSISTANT PROFESSOR
10  B.TECH   & M.TECH  ASSISTANT PROFESSOR
11  B.TECH   & M.TECH  ASSISTANT PROFESSOR
12  B.TECH   & M.TECH  ASSISTANT PROFESSOR
13     B.E  & M.TECH   ASSISTANT PROFESSOR
14     B.E  & M.TECH   ASSISTANT PROFESSOR
15        B.E  & M.E   ASSISTANT PROFESSOR
16  B.TECH  & M.TECH   ASSISTANT PROFESSOR
17  B.TECH  & M.TECH   ASSISTANT PROFESSOR
18  B.TECH  & M.TECH   ASSISTANT PROFESSOR
19  B.TECH  & M.TECH   ASSISTANT PROFESSOR
20  B.TECH  & M.TECH   ASSISTANT PROFESSOR
21        B.E  & M.E   ASSISTANT PROFESSOR
22     B.E 

In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import cross_val_score

# Load the dataset
df = qual_design_df

# Function to check if the designation matches the qualification
def check_designation_qualification(row):
    qualification = row['Qualification'].upper().strip()
    designation = row['Designation'].upper().strip()

    if 'PHD' in qualification:
        if designation in ['PROFESSOR', 'ASSOCIATE PROFESSOR', 'ASSISTANT PROFESSOR']:
            return 1  # Correct designation for PhD
        else:
            return 0  # Incorrect designation for PhD
    elif 'M.E' in qualification or 'M.TECH' in qualification:
        if designation in ['ASSOCIATE PROFESSOR', 'ASSISTANT PROFESSOR']:
            return 1  # Correct designation for M.E./M.Tech
        else:
            return 0  # Incorrect designation for M.E./M.Tech
    elif 'B.E' in qualification or 'B.TECH' in qualification:
        if designation == 'ASSISTANT PROFESSOR':
            return 1  # Correct designation for B.E./B.Tech
        else:
            return 0  # Incorrect designation for B.E./B.Tech
    else:
        return 0  # Incorrect qualification or unrecognized designation

# Apply the function to create a target column
df['Target'] = df.apply(check_designation_qualification, axis=1)

# Prepare features and target variable
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(df['Qualification'])
y = df['Target']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
model = DecisionTreeClassifier(random_state=42)
model.fit(X_train, y_train)

# Predict and evaluate on the test set
y_pred = model.predict(X_test)

# Print classification report
print("Classification Report:")
print(classification_report(y_test, y_pred))

# Print confusion matrix
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(cm)

# Cross-validation scores
scores = cross_val_score(model, X, y, cv=5)  # 5-fold cross-validation
print("Cross-validation scores:", scores)
print("Mean cross-validation score:", scores.mean())

# Apply the model to the entire dataset
df['Predicted'] = model.predict(vectorizer.transform(df['Qualification']))

# Check the correctness of the predictions
df['Predicted_Status'] = df.apply(
    lambda row: 'correct' if row['Predicted'] == 1 and check_designation_qualification(row) == 1 else 'incorrect', axis=1)

# Display the results
print(df[['Qualification', 'Designation', 'Predicted_Status']])

Classification Report:
              precision    recall  f1-score   support

           1       1.00      1.00      1.00         7

    accuracy                           1.00         7
   macro avg       1.00      1.00      1.00         7
weighted avg       1.00      1.00      1.00         7

Confusion Matrix:
[[7]]
Cross-validation scores: [1. 1. 1. 1. 1.]
Mean cross-validation score: 1.0
        Qualification          Designation Predicted_Status
0                 PhD            PROFESSOR          correct
1         B.E  & M.E   ASSOCIATE PROFESSOR          correct
2      B.E  & M.TECH   ASSOCIATE PROFESSOR          correct
3      B.E  & M.TECH   ASSOCIATE PROFESSOR          correct
4      B.E  & M.TECH   ASSISTANT PROFESSOR          correct
5         B.E  & M.E   ASSISTANT PROFESSOR          correct
6   B.TECH   & M.TECH  ASSISTANT PROFESSOR          correct
7   B.TECH   & M.TECH  ASSISTANT PROFESSOR          correct
8   B.TECH   & M.TECH  ASSISTANT PROFESSOR          correct
9   B