In [1]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score

In [2]:
import pandas as pd

survey =  pd.read_csv('Employee_Survey_data.csv')
utilization = pd.read_csv('Utilization_by_employee_Data.csv')
vendor = pd.read_csv('Vendor_options.csv')

train_data = utilization.merge(survey, 'left', on=['employee_id']).drop(columns=['employee_id'])

In [3]:
X = train_data[['memory', 'processing', 'storage', 'inverse_price']]
y = train_data['utilization_bin']

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=40)

In [5]:
svm_model = SVC()

In [6]:
param_grid = {'C': [0.1, 1, 10, 100], 'gamma': [1, 0.1, 0.02, 0.001], 'kernel': ['rbf', 'linear', 'poly', 'sigmoid']}
grid_search = GridSearchCV(svm_model, param_grid, cv=5, scoring='accuracy', verbose=1)
grid_search.fit(X_train, y_train)

print("Best Parameters:", grid_search.best_params_)

Fitting 5 folds for each of 64 candidates, totalling 320 fits


Best Parameters: {'C': 10, 'gamma': 1, 'kernel': 'rbf'}


In [7]:
best_svm_model = grid_search.best_estimator_
predictions = best_svm_model.predict(X_test)


print("Classification Report:")
print(classification_report(y_test, predictions))
print("Accuracy Score:", accuracy_score(y_test, predictions))

Classification Report:
              precision    recall  f1-score   support

        high       1.00      1.00      1.00         2
         low       1.00      1.00      1.00         4
      medium       1.00      1.00      1.00         9

    accuracy                           1.00        15
   macro avg       1.00      1.00      1.00        15
weighted avg       1.00      1.00      1.00        15

Accuracy Score: 1.0


In [8]:
vendor_scaled = vendor.apply(lambda col: col*10/100 if col.name not in ['computer_id'] else col)
X_pred = vendor_scaled[['memory', 'processing', 'storage', 'inverse_price']]
computer_id = vendor_scaled[['computer_id']]

In [9]:
import numpy as np


decision_scores = best_svm_model.decision_function(X_pred)


def softmax(x):
    exp_scores = np.exp(x - np.max(x))
    return exp_scores / np.sum(exp_scores, axis=1, keepdims=True)


predicted_probabilities = softmax(decision_scores)
predicted_df = pd.DataFrame(predicted_probabilities, columns=['high', 'low', 'medium'])
output = pd.concat([predicted_df, computer_id], axis=1)
output

Unnamed: 0,high,low,medium,computer_id
0,0.057077,0.700024,0.242899,13
1,0.693874,0.083318,0.222807,16
2,0.072006,0.676651,0.251343,4
3,0.078604,0.671026,0.250369,1
4,0.729207,0.064008,0.206785,3
5,0.189732,0.077105,0.733163,2
6,0.20716,0.065516,0.727324,20
7,0.251502,0.679608,0.068891,8
8,0.729836,0.065235,0.204929,9
9,0.066184,0.698413,0.235403,7


In [10]:
def get_highest_value_label(row):
  max_value = max(row['high'], row['medium'], row['low'])
  

  if max_value == row['high']: return "High", max_value
  elif max_value == row['medium']: return "Medium", max_value
  else: return "Low", row['low']


output[['highest_label', 'max_prob']] = output.apply(get_highest_value_label, axis=1, result_type='expand')
output

Unnamed: 0,high,low,medium,computer_id,highest_label,max_prob
0,0.057077,0.700024,0.242899,13,Low,0.700024
1,0.693874,0.083318,0.222807,16,High,0.693874
2,0.072006,0.676651,0.251343,4,Low,0.676651
3,0.078604,0.671026,0.250369,1,Low,0.671026
4,0.729207,0.064008,0.206785,3,High,0.729207
5,0.189732,0.077105,0.733163,2,Medium,0.733163
6,0.20716,0.065516,0.727324,20,Medium,0.727324
7,0.251502,0.679608,0.068891,8,Low,0.679608
8,0.729836,0.065235,0.204929,9,High,0.729836
9,0.066184,0.698413,0.235403,7,Low,0.698413


In [11]:
ids = output.groupby('highest_label')['max_prob'].idxmax()
max_scores = output.loc[ids]
max_scores

Unnamed: 0,high,low,medium,computer_id,highest_label,max_prob
8,0.729836,0.065235,0.204929,9,High,0.729836
0,0.057077,0.700024,0.242899,13,Low,0.700024
5,0.189732,0.077105,0.733163,2,Medium,0.733163


In [12]:
vendor

Unnamed: 0,computer_id,memory,processing,storage,inverse_price
0,13,5,7,10,2.7
1,16,9,8,9,1.3
2,4,8,9,10,1.0
3,1,8,8,9,1.7
4,3,5,4,4,5.7
5,2,6,7,7,3.3
6,20,7,10,7,2.0
7,8,9,6,9,2.0
8,9,9,8,7,2.0
9,7,7,7,9,2.3
