### All imports in one place

In [36]:
import pickle
import warnings
import numpy as np
import pandas as pd

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

### Ignoring warnings

In [37]:
warnings.filterwarnings('ignore')

### Loading dataset and labels

In [38]:
dataset = pd.read_csv('./corpora/dataset.csv')
role = dataset['Role']
data = { 'Role': role }
labels = pd.DataFrame(data)
dataset.drop('Role', axis=1, inplace=True)

### Pre-processing 1

In [39]:
dataset = dataset.replace(['yes'], 1)
dataset = dataset.replace(['no'], 0)
dataset = dataset.replace(['excellent'], 2)
dataset = dataset.replace(['medium'], 1)
dataset = dataset.replace(['poor'], 0)

dataset['CGPA'] = dataset['CGPA'].fillna(0)
dataset['Did you do webdev during college time ?'] = dataset['Did you do webdev during college time ?'].fillna(0)
dataset['Are you good at Data analysis ?'] = dataset['Are you good at Data analysis ?'].fillna(0)
dataset['reading and writing skills'] = dataset['reading and writing skills'].fillna(0)
dataset['Are you a tech person ?'] = dataset['Are you a tech person ?'].fillna(0)
dataset['Were you in a non tech society ?'] = dataset['Were you in a non tech society ?'].fillna(0)
dataset['Are you good at coding ?'] = dataset['Are you good at coding ?'].fillna(0)
dataset['Have you developed mobile apps ?'] = dataset['Have you developed mobile apps ?'].fillna(0)
dataset['Are you good at communication ?'] = dataset['Are you good at communication ?'].fillna(0)
dataset['Do you have specialization in security'] = dataset['Do you have specialization in security'].fillna(0)
dataset['Have you ever handled large databases ?'] = dataset['Have you ever handled large databases ?'].fillna(0)
dataset['Do you have knowlege of statistics and data science?'] = dataset['Do you have knowlege of statistics and data science?'].fillna(0)
dataset['Are you proficient in English ?'] = dataset['Are you proficient in English ?'].fillna(0)
dataset['Have you ever managed some event?'] = dataset['Have you ever managed some event?'].fillna(0)
dataset['Do you write technical blogs ?'] = dataset['Do you write technical blogs ?'].fillna(0)
dataset['Are you into marketing ?'] = dataset['Are you into marketing ?'].fillna(0)
dataset['Are you a ML expert ?'] = dataset['Are you a ML expert ?'].fillna(0)
dataset['Do you have a lot of connections ?'] = dataset['Do you have a lot of connections ?'].fillna(0)
dataset['Have you ever built live project ?'] = dataset['Have you ever built live project ?'].fillna(0)

### Pre-processsing 2

In [40]:
standardScaler = StandardScaler()
columns_to_scale = ['CGPA']
dataset[columns_to_scale] = standardScaler.fit_transform(dataset[columns_to_scale])

### Creating the training datasets

In [41]:
x_train, y_train = dataset, labels

### K-Nearest Neighbors Classifier with K = 20

In [42]:
knn_classifier = KNeighborsClassifier(n_neighbors=20)
knn_classifier.fit(x_train, y_train)
pickle.dump(knn_classifier, open('models/knn_classifier.sav', 'wb'))

### Decision Tree Classifier with as many features as columns

In [43]:
dt_classifier = DecisionTreeClassifier(max_features=len(dataset.columns), random_state=0)
dt_classifier.fit(x_train, y_train)
pickle.dump(dt_classifier, open('models/dt_classifier.sav', 'wb'))

### Support Vector Classifier with a linear kernel

In [44]:
svc_classifier = SVC(kernel='linear')
svc_classifier.fit(x_train, y_train)
pickle.dump(svc_classifier, open('models/svc_classifier.sav', 'wb'))

### Logistic Regression classifier

In [45]:
lg_classifier = LogisticRegression()
lg_classifier.fit(x_train, y_train)
pickle.dump(lg_classifier, open('models/lg_classifier.sav', 'wb'))

### Random Forest Classifier with 1000 estimators

In [46]:
rf_classifier = RandomForestClassifier(n_estimators=1000, random_state=0)
rf_classifier.fit(x_train, y_train)
pickle.dump(rf_classifier, open('models/rf_classifier.sav', 'wb'))