In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import accuracy_score

# Load the data from a CSV file
data = pd.read_csv("degree.csv")

# Split the dataset into training and testing sets
X = data[['educational_factor', 'social_factor', 'stream']]
y = data['degree']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Apply MinMax scaling to the numerical features
numerical_features = ['educational_factor', 'social_factor']
numerical_transformer = MinMaxScaler()
preprocessor = ColumnTransformer(transformers=[('num', numerical_transformer, numerical_features)])

# Apply one-hot encoding to the categorical feature
categorical_features = ['stream']
categorical_transformer = OneHotEncoder(sparse=False, handle_unknown='ignore')

# Combine the numerical and categorical preprocessing steps
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_features),
        ('cat', categorical_transformer, categorical_features)
    ])

# Fit and transform the training data
X_train_processed = preprocessor.fit_transform(X_train)
X_test_processed = preprocessor.transform(X_test)

# Train a k-nearest neighbors model
model = KNeighborsClassifier()
model.fit(X_train_processed, y_train)

# Make predictions on the test set and get the predicted probabilities
y_pred = model.predict(X_test_processed)
y_pred_prob = model.predict_proba(X_test_processed)

# Combine predicted probabilities with degree labels
predictions = pd.DataFrame({'Degree': y_pred, 'Percentage': y_pred_prob.max(axis=1)})

# Print the recommendations with degrees and percentages
recommendations = predictions.groupby('Degree')['Percentage'].mean().sort_values(ascending=False)
print(recommendations)

# Calculate the accuracy score
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)


Degree
Bachelor of Software Engineering Honours                                           1.0000000000
Bachelor of Technology (BTech) Honours in Agriculture and Plantation Engineering   1.0000000000
Bachelor of Technology - Computer Engineering                                      1.0000000000
Bachelor of Technology - Mechanical Engineering                                    1.0000000000
Bachelor of Technology - Mechatronics Engineering                                  1.0000000000
Bachelor of Technology - Electrical Engineering                                    0.9333333333
Bachelor of Industrial Studies Honours – Textile Manufacture Specialization        0.9142857143
Bachelor of Industrial Studies Honours - Agriculture                               0.9000000000
Bachelor of Industrial Studies Honours – Fashion Design and Product Development    0.8857142857
Bachelor of Technology - Electronic and Communication Engineering                  0.8800000000
Bachelor of Technology - Civil En



In [None]:
# Input data for prediction
input_data = pd.DataFrame({
    'educational_factor': [2],
    'social_factor': [3],
    'stream': ['MathematicsD with IT']
})

# Apply preprocessing to the input data
input_processed = preprocessor.transform(input_data)

# Make predictions on the input data
predictions = model.predict_proba(input_processed)

# Get the probability for each degree
degrees = model.classes_
degree_probs = zip(degrees, predictions[0])

# Print the predicted probabilities for each degree
print("Predicted Probabilities:")
for degree, prob in degree_probs:
    print(f"Degree: {degree}, Probability: {prob:.4f}")

Predicted Probabilities:
Degree: Bachelor of Industrial Studies Honours - Agriculture, Probability: 0.0000
Degree: Bachelor of Industrial Studies Honours – Apparel Production and Management, Probability: 0.2000
Degree: Bachelor of Industrial Studies Honours – Fashion Design and Product Development, Probability: 0.0000
Degree: Bachelor of Industrial Studies Honours – Textile Manufacture Specialization, Probability: 0.0000
Degree: Bachelor of Software Engineering Honours, Probability: 0.0000
Degree: Bachelor of Technology (BTech) Honours in Agriculture and Plantation Engineering, Probability: 0.2000
Degree: Bachelor of Technology - Civil Engineering, Probability: 0.0000
Degree: Bachelor of Technology - Computer Engineering, Probability: 0.0000
Degree: Bachelor of Technology - Electrical Engineering, Probability: 0.0000
Degree: Bachelor of Technology - Electronic and Communication Engineering, Probability: 0.2000
Degree: Bachelor of Technology - Mechanical Engineering, Probability: 0.2000

In [None]:
import pickle

# Save the trained model as a pickle file
with open('model.pkl', 'wb') as file:
    pickle.dump(model, file)