**Importing the Dependencies**

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder

Data Collection and Data Processing

In [2]:
#Loading the dataset into pandas dataframe
hiring_dataset = pd.read_csv("/content/Hiring Challenge.csv")

In [3]:
hiring_dataset.head()

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11,C12,C13,C14,C15,Hired
0,b,30.83,0.0,u,g,w,v,1.25,t,t,1,f,g,202,0,1
1,a,58.67,4.46,u,g,q,h,3.04,t,t,6,f,g,43,560,1
2,a,24.5,0.5,u,g,q,h,1.5,t,f,0,f,g,280,824,1
3,b,27.83,1.54,u,g,w,v,3.75,t,t,5,t,g,100,3,1
4,b,20.17,5.625,u,g,w,v,1.71,t,f,0,f,s,120,0,1


In [4]:
#Statistical measure of the dataset
hiring_dataset.describe()

Unnamed: 0,C3,C8,C11,C15,Hired
count,690.0,690.0,690.0,690.0,690.0
mean,4.758725,2.223406,2.4,1017.385507,0.444928
std,4.978163,3.346513,4.86294,5210.102598,0.497318
min,0.0,0.0,0.0,0.0,0.0
25%,1.0,0.165,0.0,0.0,0.0
50%,2.75,1.0,0.0,5.0,0.0
75%,7.2075,2.625,3.0,395.5,1.0
max,28.0,28.5,67.0,100000.0,1.0


In [5]:
#Statistical measure of the dataset
hiring_dataset.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
C3,690.0,4.758725,4.978163,0.0,1.0,2.75,7.2075,28.0
C8,690.0,2.223406,3.346513,0.0,0.165,1.0,2.625,28.5
C11,690.0,2.4,4.86294,0.0,0.0,0.0,3.0,67.0
C15,690.0,1017.385507,5210.102598,0.0,0.0,5.0,395.5,100000.0
Hired,690.0,0.444928,0.497318,0.0,0.0,0.0,1.0,1.0


In [6]:
hiring_dataset.isnull().sum()

C1       0
C2       0
C3       0
C4       0
C5       0
C6       0
C7       0
C8       0
C9       0
C10      0
C11      0
C12      0
C13      0
C14      0
C15      0
Hired    0
dtype: int64

In [7]:
hiring_dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 690 entries, 0 to 689
Data columns (total 16 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   C1      690 non-null    object 
 1   C2      690 non-null    object 
 2   C3      690 non-null    float64
 3   C4      690 non-null    object 
 4   C5      690 non-null    object 
 5   C6      690 non-null    object 
 6   C7      690 non-null    object 
 7   C8      690 non-null    float64
 8   C9      690 non-null    object 
 9   C10     690 non-null    object 
 10  C11     690 non-null    int64  
 11  C12     690 non-null    object 
 12  C13     690 non-null    object 
 13  C14     690 non-null    object 
 14  C15     690 non-null    int64  
 15  Hired   690 non-null    int64  
dtypes: float64(2), int64(3), object(11)
memory usage: 86.4+ KB


In [8]:
for column in hiring_dataset.columns:
    print(hiring_dataset[column].value_counts())

C1
b    468
a    210
?     12
Name: count, dtype: int64
C2
?        12
22.67     9
20.42     7
18.83     6
24.5      6
         ..
48.25     1
28.33     1
18.75     1
18.5      1
36.42     1
Name: count, Length: 350, dtype: int64
C3
1.500     21
0.000     19
3.000     19
2.500     19
0.750     16
          ..
0.085      1
12.250     1
11.045     1
11.125     1
3.375      1
Name: count, Length: 215, dtype: int64
C4
u    519
y    163
?      6
l      2
Name: count, dtype: int64
C5
g     519
p     163
?       6
gg      2
Name: count, dtype: int64
C6
c     137
q      78
w      64
i      59
aa     54
ff     53
k      51
cc     41
m      38
x      38
d      30
e      25
j      10
?       9
r       3
Name: count, dtype: int64
C7
v     399
h     138
bb     59
ff     57
?       9
j       8
z       8
dd      6
n       4
o       2
Name: count, dtype: int64
C8
0.000    70
0.250    35
0.040    33
1.000    31
0.125    30
         ..
4.165     1
9.000     1
1.960     1
5.125     1
8.290     1
Name: co

In [9]:
hiring_dataset.replace('?', np.NaN, inplace=True)

In [10]:
#Number of Missing Values
missing_values = hiring_dataset.isna().sum()
print("Total Numbers of missing values in our dataset:\n", missing_values)

Total Numbers of missing values in our dataset:
 C1       12
C2       12
C3        0
C4        6
C5        6
C6        9
C7        9
C8        0
C9        0
C10       0
C11       0
C12       0
C13       0
C14      13
C15       0
Hired     0
dtype: int64


**Insight**
> Our dataset contains both numeric and non-numeric data (specifically data that are of float64, int64 and object types)

> The dataset also contains values from several ranges. Some features have a value range of 0 - 28, some have a range of 2 - 67, and some have a range of 1017 - 100000. Apart from these, we can get useful statistical information (like mean, max, and min) about the features that have numerical values.

> Finally, the dataset has missing values. The missing values in the dataset are labeled with '?', which can be seen in the cell's output. We replaced all the question marks with NaNs.

Handling Missing Values (Numeric & Categorical)

Mean Imputation --> Numeric Column

Mode Imputation --> Categorical Column

In [11]:
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer

# Define which columns are numeric and which are categorical
numeric_features = hiring_dataset.select_dtypes(include=['number']).columns
categorical_features = hiring_dataset.select_dtypes(include=['object']).columns

# Create mean imputer for numeric columns, but keep existing numeric values unchanged
numeric_imputer = ColumnTransformer(
    transformers=[('numeric', SimpleImputer(strategy='mean'), numeric_features)],
    remainder='passthrough'
)

# Create mode imputer for categorical columns
categorical_imputer = SimpleImputer(strategy='most_frequent')

# Apply imputation strategies to the respective columns
hiring_dataset_filled = hiring_dataset.copy()
hiring_dataset_filled[numeric_features] = numeric_imputer.fit_transform(hiring_dataset[numeric_features])
hiring_dataset_filled[categorical_features] = categorical_imputer.fit_transform(hiring_dataset[categorical_features])

print(hiring_dataset_filled)


    C1     C2      C3 C4 C5  C6  C7    C8 C9 C10  C11 C12 C13  C14    C15  \
0    b  30.83   0.000  u  g   w   v  1.25  t   t  1.0   f   g  202    0.0   
1    a  58.67   4.460  u  g   q   h  3.04  t   t  6.0   f   g   43  560.0   
2    a   24.5   0.500  u  g   q   h  1.50  t   f  0.0   f   g  280  824.0   
3    b  27.83   1.540  u  g   w   v  3.75  t   t  5.0   t   g  100    3.0   
4    b  20.17   5.625  u  g   w   v  1.71  t   f  0.0   f   s  120    0.0   
..  ..    ...     ... .. ..  ..  ..   ... ..  ..  ...  ..  ..  ...    ...   
685  b  21.08  10.085  y  p   e   h  1.25  f   f  0.0   f   g  260    0.0   
686  a  22.67   0.750  u  g   c   v  2.00  f   t  2.0   t   g  200  394.0   
687  a  25.25  13.500  y  p  ff  ff  2.00  f   t  1.0   t   g  200    1.0   
688  b  17.92   0.205  u  g  aa   v  0.04  f   f  0.0   f   g  280  750.0   
689  b     35   3.375  u  g   c   h  8.29  f   f  0.0   t   g    0    0.0   

     Hired  
0      1.0  
1      1.0  
2      1.0  
3      1.0  
4      1.0

In [None]:
hiring_dataset.tail()

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11,C12,C13,C14,C15,Hired
685,b,21.08,10.085,y,p,e,h,1.25,f,f,0,f,g,260,0,0
686,a,22.67,0.75,u,g,c,v,2.0,f,t,2,t,g,200,394,0
687,a,25.25,13.5,y,p,ff,ff,2.0,f,t,1,t,g,200,1,0
688,b,17.92,0.205,u,g,aa,v,0.04,f,f,0,f,g,280,750,0
689,b,35.0,3.375,u,g,c,h,8.29,f,f,0,t,g,0,0,0


In [30]:
#As we did mean imputation and mode imputation above, we're checking again for missing values for confirmation/understanding
#Number of Missing Values

# Execute the first Python command
missing_values_filled = hiring_dataset_filled.isna().sum()

# Execute the second Python command
missing_values = hiring_dataset.isna().sum()

# Concatenate the outputs horizontally
concatenated_output = pd.concat([missing_values_filled, missing_values], axis=1)
concatenated_output.columns = ['hiring_dataset_filled', 'hiring_dataset']

# Display the concatenated output
print("Number of Missing Values: \n\n", concatenated_output)

Number of Missing Values: 

        hiring_dataset_filled  hiring_dataset
C1                         0              12
C2                         0              12
C3                         0               0
C4                         0               6
C5                         0               6
C6                         0               9
C7                         0               9
C8                         0               0
C9                         0               0
C10                        0               0
C11                        0               0
C12                        0               0
C13                        0               0
C14                        0              13
C15                        0               0
Hired                      0               0


In [34]:
import copy
# Deep copy
hiring_data = copy.deepcopy(hiring_dataset_filled)
hiring_data.head()

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11,C12,C13,C14,C15,Hired
0,b,30.83,0.0,u,g,w,v,1.25,t,t,1.0,f,g,202,0.0,1.0
1,a,58.67,4.46,u,g,q,h,3.04,t,t,6.0,f,g,43,560.0,1.0
2,a,24.5,0.5,u,g,q,h,1.5,t,f,0.0,f,g,280,824.0,1.0
3,b,27.83,1.54,u,g,w,v,3.75,t,t,5.0,t,g,100,3.0,1.0
4,b,20.17,5.625,u,g,w,v,1.71,t,f,0.0,f,s,120,0.0,1.0


In [35]:
hiring_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 690 entries, 0 to 689
Data columns (total 16 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   C1      690 non-null    object 
 1   C2      690 non-null    object 
 2   C3      690 non-null    float64
 3   C4      690 non-null    object 
 4   C5      690 non-null    object 
 5   C6      690 non-null    object 
 6   C7      690 non-null    object 
 7   C8      690 non-null    float64
 8   C9      690 non-null    object 
 9   C10     690 non-null    object 
 10  C11     690 non-null    float64
 11  C12     690 non-null    object 
 12  C13     690 non-null    object 
 13  C14     690 non-null    object 
 14  C15     690 non-null    float64
 15  Hired   690 non-null    float64
dtypes: float64(5), object(11)
memory usage: 86.4+ KB


C2 and C14 showing object datatype and integer data showing with head() and tail() function, need to check all the elements available for both column

In [50]:
# Check the data type of the element at index 0 in the 'C2' & C14column
element_data_type1 = type(hiring_data.at[0, 'C2'])

# Print the data type
print("Data type of the element:", element_data_type1)
print(hiring_data.at[0, 'C2'])

element_data_type2 = type(hiring_data.at[0, 'C14'])

# Print the data type
print("\nData type of the element:", element_data_type2)
print(hiring_data.at[0, 'C14'])

Data type of the element: <class 'str'>
30.83

Data type of the element: <class 'str'>
202


We can see that type of elements in C2 and c14 Column is string hence it is showing object data type, we need to convert it to float data type

In [51]:
# Converting data type from string to float
hiring_data['C2'] = hiring_data['C2'].astype(float)
hiring_data['C14'] = hiring_data['C14'].astype(float)

In [52]:
hiring_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 690 entries, 0 to 689
Data columns (total 16 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   C1      690 non-null    object 
 1   C2      690 non-null    float64
 2   C3      690 non-null    float64
 3   C4      690 non-null    object 
 4   C5      690 non-null    object 
 5   C6      690 non-null    object 
 6   C7      690 non-null    object 
 7   C8      690 non-null    float64
 8   C9      690 non-null    object 
 9   C10     690 non-null    object 
 10  C11     690 non-null    float64
 11  C12     690 non-null    object 
 12  C13     690 non-null    object 
 13  C14     690 non-null    float64
 14  C15     690 non-null    float64
 15  Hired   690 non-null    float64
dtypes: float64(7), object(9)
memory usage: 86.4+ KB


In [53]:
# Instantiate LabelEncoder
le = LabelEncoder()

# List to store the categorical column names
categorical_columns = []

# Iterate over columns and identify categorical columns
for col in hiring_data.columns:
    if hiring_data[col].dtype == 'object':
        categorical_columns.append(col)

# Encode categorical columns
for col in categorical_columns:
    hiring_data[col] = le.fit_transform(hiring_data[col])

# Display the DataFrame after encoding
print(hiring_data)

     C1     C2      C3  C4  C5  C6  C7    C8  C9  C10  C11  C12  C13    C14  \
0     1  30.83   0.000   1   0  12   7  1.25   1    1  1.0    0    0  202.0   
1     0  58.67   4.460   1   0  10   3  3.04   1    1  6.0    0    0   43.0   
2     0  24.50   0.500   1   0  10   3  1.50   1    0  0.0    0    0  280.0   
3     1  27.83   1.540   1   0  12   7  3.75   1    1  5.0    1    0  100.0   
4     1  20.17   5.625   1   0  12   7  1.71   1    0  0.0    0    2  120.0   
..   ..    ...     ...  ..  ..  ..  ..   ...  ..  ...  ...  ...  ...    ...   
685   1  21.08  10.085   2   2   4   3  1.25   0    0  0.0    0    0  260.0   
686   0  22.67   0.750   1   0   1   7  2.00   0    1  2.0    1    0  200.0   
687   0  25.25  13.500   2   2   5   2  2.00   0    1  1.0    1    0  200.0   
688   1  17.92   0.205   1   0   0   7  0.04   0    0  0.0    0    0  280.0   
689   1  35.00   3.375   1   0   1   3  8.29   0    0  0.0    1    0    0.0   

       C15  Hired  
0      0.0    1.0  
1    560.0 

In [54]:
hiring_data.head()

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11,C12,C13,C14,C15,Hired
0,1,30.83,0.0,1,0,12,7,1.25,1,1,1.0,0,0,202.0,0.0,1.0
1,0,58.67,4.46,1,0,10,3,3.04,1,1,6.0,0,0,43.0,560.0,1.0
2,0,24.5,0.5,1,0,10,3,1.5,1,0,0.0,0,0,280.0,824.0,1.0
3,1,27.83,1.54,1,0,12,7,3.75,1,1,5.0,1,0,100.0,3.0,1.0
4,1,20.17,5.625,1,0,12,7,1.71,1,0,0.0,0,2,120.0,0.0,1.0


In [55]:
#Statistical measure of the CLeaned & LablelEncoded dataset
hiring_data.describe()

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11,C12,C13,C14,C15,Hired
count,690.0,690.0,690.0,690.0,690.0,690.0,690.0,690.0,690.0,690.0,690.0,690.0,690.0,690.0,690.0,690.0
mean,0.695652,31.41342,4.758725,1.233333,0.475362,5.698551,5.098551,2.223406,0.523188,0.427536,2.4,0.457971,0.176812,180.547826,1017.385507,0.444928
std,0.460464,11.910293,4.978163,0.430063,0.850238,4.285748,2.510731,3.346513,0.499824,0.49508,4.86294,0.498592,0.557869,173.970323,5210.102598,0.497318
min,0.0,13.75,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,22.67,1.0,1.0,0.0,1.0,3.0,0.165,0.0,0.0,0.0,0.0,0.0,60.0,0.0,0.0
50%,1.0,28.17,2.75,1.0,0.0,5.0,7.0,1.0,1.0,0.0,0.0,0.0,0.0,160.0,5.0,0.0
75%,1.0,37.7075,7.2075,1.0,0.0,10.0,7.0,2.625,1.0,1.0,3.0,1.0,0.0,272.0,395.5,1.0
max,1.0,80.25,28.0,2.0,2.0,13.0,8.0,28.5,1.0,1.0,67.0,1.0,2.0,2000.0,100000.0,1.0


In [56]:
hiring_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 690 entries, 0 to 689
Data columns (total 16 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   C1      690 non-null    int64  
 1   C2      690 non-null    float64
 2   C3      690 non-null    float64
 3   C4      690 non-null    int64  
 4   C5      690 non-null    int64  
 5   C6      690 non-null    int64  
 6   C7      690 non-null    int64  
 7   C8      690 non-null    float64
 8   C9      690 non-null    int64  
 9   C10     690 non-null    int64  
 10  C11     690 non-null    float64
 11  C12     690 non-null    int64  
 12  C13     690 non-null    int64  
 13  C14     690 non-null    float64
 14  C15     690 non-null    float64
 15  Hired   690 non-null    float64
dtypes: float64(7), int64(9)
memory usage: 86.4 KB


In [57]:
hiring_data['Hired'].value_counts()

Hired
0.0    383
1.0    307
Name: count, dtype: int64

0 --> Not Hired

1 --> Hired

In [62]:
hiring_data.groupby("Hired").mean()

Unnamed: 0_level_0,C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11,C12,C13,C14,C15
Hired,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
0.0,0.707572,29.621854,3.839948,1.308094,0.616188,5.198433,5.096606,1.257924,0.201044,0.224543,0.631854,0.443864,0.227154,196.049608,198.605744
1.0,0.680782,33.648502,5.904951,1.140065,0.299674,6.322476,5.100977,3.427899,0.925081,0.680782,4.605863,0.47557,0.114007,161.208469,2038.859935


**Separating data and Labels**

In [63]:
#Separating data and Labels
X = hiring_data.drop(columns="Hired", axis=1)
Y = hiring_data['Hired']

In [64]:
print(X)
print(Y)

     C1     C2      C3  C4  C5  C6  C7    C8  C9  C10  C11  C12  C13    C14  \
0     1  30.83   0.000   1   0  12   7  1.25   1    1  1.0    0    0  202.0   
1     0  58.67   4.460   1   0  10   3  3.04   1    1  6.0    0    0   43.0   
2     0  24.50   0.500   1   0  10   3  1.50   1    0  0.0    0    0  280.0   
3     1  27.83   1.540   1   0  12   7  3.75   1    1  5.0    1    0  100.0   
4     1  20.17   5.625   1   0  12   7  1.71   1    0  0.0    0    2  120.0   
..   ..    ...     ...  ..  ..  ..  ..   ...  ..  ...  ...  ...  ...    ...   
685   1  21.08  10.085   2   2   4   3  1.25   0    0  0.0    0    0  260.0   
686   0  22.67   0.750   1   0   1   7  2.00   0    1  2.0    1    0  200.0   
687   0  25.25  13.500   2   2   5   2  2.00   0    1  1.0    1    0  200.0   
688   1  17.92   0.205   1   0   0   7  0.04   0    0  0.0    0    0  280.0   
689   1  35.00   3.375   1   0   1   3  8.29   0    0  0.0    1    0    0.0   

       C15  
0      0.0  
1    560.0  
2    824.0  

Training and Test Data

In [65]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=1)

In [66]:
print(X.shape, X_train.shape, X_test.shape)

(690, 15) (552, 15) (138, 15)


In [67]:
print(X_train)
print(Y_train)

     C1     C2      C3  C4  C5  C6  C7     C8  C9  C10  C11  C12  C13    C14  \
229   1  22.08  11.000   1   0   2   7  0.665   1    0  0.0    0    0  100.0   
367   1  39.42   1.710   2   2   9   7  0.165   0    0  0.0    0    2  400.0   
0     1  30.83   0.000   1   0  12   7  1.250   1    1  1.0    0    0  202.0   
494   0  18.83   4.415   2   2   1   3  3.000   1    0  0.0    0    0  240.0   
619   1  18.42  10.415   2   2   0   7  0.125   1    0  0.0    0    0  120.0   
..   ..    ...     ...  ..  ..  ..  ..    ...  ..  ...  ...  ...  ...    ...   
428   1  49.17   2.290   1   0   5   2  0.290   0    0  0.0    0    0  200.0   
485   1  74.83  19.000   2   2   5   2  0.040   0    1  2.0    0    0    0.0   
63    0  20.42   0.835   1   0  10   7  1.585   1    1  1.0    0    0    0.0   
687   0  25.25  13.500   2   2   5   2  2.000   0    1  1.0    1    0  200.0   
31    1  42.00   9.790   1   0  13   3  7.960   1    1  8.0    0    0    0.0   

       C15  
229    0.0  
367    0.0  


Model Training --> LogisticRegression

In [68]:
model = LogisticRegression()

In [69]:
#Train the Logistci Regression model with training data
model.fit(X_train, Y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Model Evaluation

In [70]:
#accuracy on training data
X_train_prediction = model.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediction, Y_train)

In [71]:
print("Accuracy on training data: ", training_data_accuracy)

Accuracy on training data:  0.8188405797101449


In [72]:
#accuracy score on test data
X_test_prediction = model.predict(X_test)
testing_data_accuracy = accuracy_score(X_test_prediction, Y_test)

In [73]:
print("Accuracy on testing data: ", testing_data_accuracy)

Accuracy on testing data:  0.8043478260869565


Making Predictive System

In [77]:
csv_data = hiring_data.to_csv(index=False)
print(csv_data)

C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11,C12,C13,C14,C15,Hired
1,30.83,0.0,1,0,12,7,1.25,1,1,1.0,0,0,202.0,0.0,1.0
0,58.67,4.46,1,0,10,3,3.04,1,1,6.0,0,0,43.0,560.0,1.0
0,24.5,0.5,1,0,10,3,1.5,1,0,0.0,0,0,280.0,824.0,1.0
1,27.83,1.54,1,0,12,7,3.75,1,1,5.0,1,0,100.0,3.0,1.0
1,20.17,5.625,1,0,12,7,1.71,1,0,0.0,0,2,120.0,0.0,1.0
1,32.08,4.0,1,0,9,7,2.5,1,0,0.0,1,0,360.0,0.0,1.0
1,33.17,1.04,1,0,11,3,6.5,1,0,0.0,1,0,164.0,31285.0,1.0
0,22.92,11.585,1,0,2,7,0.04,1,0,0.0,0,0,80.0,1349.0,1.0
1,54.42,0.5,2,2,8,3,3.96,1,0,0.0,0,0,180.0,314.0,1.0
1,42.5,4.915,2,2,12,7,3.165,1,0,0.0,1,0,52.0,1442.0,1.0
1,22.08,0.83,1,0,1,3,2.165,0,0,0.0,1,0,128.0,0.0,1.0
1,29.92,1.835,1,0,1,3,4.335,1,0,0.0,0,0,260.0,200.0,1.0
0,38.25,6.0,1,0,8,7,1.0,1,0,0.0,1,0,0.0,0.0,1.0
1,48.08,6.04,1,0,8,7,0.04,0,0,0.0,0,0,0.0,2690.0,1.0
0,45.83,10.5,1,0,10,7,5.0,1,1,7.0,1,0,0.0,0.0,1.0
1,36.67,4.415,2,2,8,7,0.25,1,1,10.0,1,0,320.0,0.0,1.0
1,28.25,0.875,1,0,9,7,0.96,1,1,3.0,1,0,396.0,0.0,1.0
0,23.25,5.875,1,0,10,7,3.17,1,1,10.0,0,0

In [86]:
input_data = (1,36.17,0.42,2,2,12,7,0.29,0,0,0.0,1,0,309.0,2.0)

#changing the input_data to numpy array
input_data_as_numpy_array = np.asarray(input_data)

#reshape the np array as we are predicting for one instance
input_data_reshaped = input_data_as_numpy_array.reshape(1, -1)

prediction = model.predict(input_data_reshaped)
print(prediction)

if prediction == 1:
  print("Hired")
else:
  print("Not Hired")

[0.]
Not Hired


