# **Objective**

To predict whether a person is having a job (i.e. placed) or not.

# **Importing necessary libraries**

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from yellowbrick.classifier import ConfusionMatrix
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost.sklearn import XGBClassifier
import os

ModuleNotFoundError: No module named 'yellowbrick'

# **Importing the Data**

In [None]:
df = pd.read_csv(r"/content/Job_Placement_Data.csv")

# Making a copy of the data to save original data from manipulations.

In [None]:
data = df.copy()

# **EDA**

In [None]:
data.isna().sum()

**So, no missing value in our data.**

In [None]:
data[data.duplicated()]

**No duplicate values exist in our data**

In [None]:
data.info()

# **Visualizations**

In [None]:
sns.countplot(data=data,x='gender',hue='status')

In [None]:
sns.violinplot(x='ssc_percentage',y='ssc_board',hue='status',data=data)
plt.legend(bbox_to_anchor=(1.02,1))

In [None]:
sns.countplot(data=data,x='ssc_board',hue='status')
plt.legend(bbox_to_anchor=(1.02,1))

In [None]:
sns.stripplot(data=data,x='ssc_board',y='ssc_percentage',hue='status')

In [None]:
sns.scatterplot(x=data.index,y=data['hsc_percentage'],hue=data['status'])
plt.legend(bbox_to_anchor=(1.02,1))

In [None]:
sns.stripplot(data=data,x='hsc_subject',y='hsc_percentage',hue='status')
plt.legend(bbox_to_anchor=(1.3,1))

In [None]:
sns.stripplot(data=data,x='hsc_board',y='hsc_percentage',hue='status')

In [None]:
sns.stripplot(data=data,x='undergrad_degree',y='degree_percentage',hue='status')
plt.legend(bbox_to_anchor=(1.3,1))

In [None]:
sns.countplot(data=data,x='undergrad_degree',hue='status')

In [None]:
sns.stripplot(data=data,x='specialisation',y='mba_percent',hue='status')

In [None]:
sns.countplot(x=data['specialisation'])

In [None]:
sns.countplot(data=data,x='specialisation',hue='status')

In [None]:
sns.stripplot(data=data,x='work_experience',y='emp_test_percentage',hue='status')

In [None]:
sns.countplot(data=data,x='work_experience',hue='status')
plt.legend(bbox_to_anchor=(1.02,1))

In [None]:
sns.scatterplot(data=data,x='ssc_percentage',y='emp_test_percentage',hue='status',size='work_experience')
plt.legend(bbox_to_anchor=(1.02,1))

In [None]:
sns.scatterplot(data=data,x='hsc_percentage',y='emp_test_percentage',hue='status',size='work_experience')
plt.legend(bbox_to_anchor=(1.02,1))

In [None]:
sns.scatterplot(data=data,x='degree_percentage',y='emp_test_percentage',hue='status',size='work_experience')
plt.legend(bbox_to_anchor=(1.02,1))

In [None]:
sns.scatterplot(data=data,x='mba_percent',y='emp_test_percentage',hue='status',size='work_experience')
plt.legend(bbox_to_anchor=(1.02,1))

In [None]:
sns.scatterplot(data=data,x=data.index,y='emp_test_percentage',hue='status',size='specialisation')
plt.legend(bbox_to_anchor=(1.02,1))

**We have 5 float variables and 8 object type variables, so converting the object type data to numerical type.**

# **Encoding**

In [None]:
data['gender'].value_counts()
gender_n = {'M':0,'F':1}
data['gender']=data['gender'].map(gender_n)

In [None]:
data['ssc_board'].value_counts()
ssc = {'Central':0,'Others':1}
data['ssc_board'] = data['ssc_board'].map(ssc)

In [None]:
data['hsc_board'].value_counts()
hsc = {'Central':0,'Others':1}
data['hsc_board'] = data['hsc_board'].map(hsc)

In [None]:
data['hsc_subject'].value_counts()
data['undergrad_degree'].value_counts()

In [None]:
data['work_experience'].value_counts()
work = {'Yes':1,'No':0}
data['work_experience']=data['work_experience'].map(work)

In [None]:
data['specialisation'].value_counts()
spec = {'Mkt&Fin':1,'Mkt&HR':0}
data['specialisation'] = data['specialisation'].map(spec)

In [None]:
data['status'].value_counts()
status = {'Placed':1,'Not Placed':0}
data['status'] = data['status'].map(status)

In [None]:
data

In [None]:
data = pd.get_dummies(data,drop_first=True)
data

In [None]:
data.describe()

In [None]:
data.corr()

In [None]:
sns.heatmap(data.corr(),cmap='viridis')

# **ML Algorithms**

In [None]:
features = data.columns.tolist()
features = set(features)-{'status'}
x_train,x_test,y_train,y_test = train_test_split(data[features],data.status,test_size=0.2)


**1. Logistic Regression**

In [None]:
log_reg = LogisticRegression(random_state=0)
log_reg.fit(x_train,y_train)
pred_log = log_reg.predict(x_test)
acc_log = accuracy_score(y_test,pred_log)
print(acc_log)
cm = ConfusionMatrix(log_reg)
cm.fit(x_train, y_train)
cm.score(x_test, y_test)
conf_log = confusion_matrix(y_test,pred_log)
conf_log
logistic_normal = (classification_report(y_test, pred_log))
print(logistic_normal)

**2. Support Vector Machines**

In [None]:
svm = SVC(kernel='poly',degree=4)
svm.fit(x_train,y_train)
pred_svm = svm.predict(x_test)
acc_svm = accuracy_score(y_test,pred_svm)
cm = ConfusionMatrix(svm)
cm.fit(x_train, y_train)
cm.score(x_test, y_test)
svm = classification_report(y_test, pred_svm)
print(svm)
print(acc_svm)
conf_svm = confusion_matrix(y_test,pred_svm)
conf_svm

**3. Gaussian Naive Bayes**

In [None]:
gaussianNB = GaussianNB()
gaussianNB.fit(x_train,y_train)
pred_gaussianNB = gaussianNB.predict(x_test)
acc_gaussianNB = accuracy_score(y_test,pred_gaussianNB)
cm = ConfusionMatrix(gaussianNB)
cm.fit(x_train, y_train)
cm.score(x_test, y_test)
gaussianNB = classification_report(y_test, pred_gaussianNB)
print(gaussianNB)
print(acc_gaussianNB)
conf_gaussianNB = confusion_matrix(y_test,pred_gaussianNB)
conf_gaussianNB

**4. K-nearest Neighbor**

In [None]:
knn = KNeighborsClassifier(n_neighbors=7)
knn.fit(x_train,y_train)
pred_knn = knn.predict(x_test)
acc_knn = accuracy_score(y_test,pred_knn)
cm = ConfusionMatrix(knn)
cm.fit(x_train, y_train)
cm.score(x_test, y_test)
knn = classification_report(y_test, pred_knn)
print(knn)
print(acc_knn)
conf_knn = confusion_matrix(y_test,pred_knn)
conf_knn

**5. Decision Tree**

In [None]:
decision_tree = DecisionTreeClassifier(criterion='entropy')
decision_tree.fit(x_train,y_train)
pred_tree = decision_tree.predict(x_test)
acc_tree = accuracy_score(y_test,pred_tree)
cm = ConfusionMatrix(decision_tree)
cm.fit(x_train, y_train)
cm.score(x_test, y_test)
decision_tree = classification_report(y_test, pred_tree)
print(decision_tree)
print(acc_tree)
conf_tree = confusion_matrix(y_test,pred_tree)
conf_tree

**6. Random Forest**

In [None]:
rand_forest = RandomForestClassifier()
rand_forest.fit(x_train,y_train)
pred_forest = rand_forest.predict(x_test)
acc_forest = accuracy_score(y_test,pred_forest)
cm = ConfusionMatrix(rand_forest)
cm.fit(x_train, y_train)
cm.score(x_test, y_test)
rand_forest = classification_report(y_test, pred_forest)
print(rand_forest)
print(acc_forest)
conf_forest = confusion_matrix(y_test,pred_forest)
conf_forest

**7. XGB Classifier**

In [None]:
xgb = XGBClassifier(n_estimators=200)
xgb.fit(x_train,y_train)
pred_xgb = xgb.predict(x_test)
acc_xgb = accuracy_score(y_test,pred_xgb)
cm = ConfusionMatrix(xgb)
cm.fit(x_train, y_train)
cm.score(x_test, y_test)
xgb = classification_report(y_test, pred_xgb)
print(xgb)
print(acc_xgb)
conf_xgb = confusion_matrix(y_test,pred_xgb)
conf_xgb