# Predicting the Salary of a hired employee on the basis of test score, interview score and work experience:

In [1]:
import numpy as np
import pandas as pd
from sklearn import linear_model
from word2number import w2n

In [2]:
def conversion_fun(alpha_num):
    if alpha_num is np.nan:
        return np.nan
    alpha_num=w2n.word_to_num(alpha_num)
    return alpha_num

### Fetching data:

In [3]:
df=pd.read_csv('hiring.csv')
df

Unnamed: 0,experience,test_score(out of 10),interview_score(out of 10),salary($)
0,,8.0,9,50000
1,,8.0,6,45000
2,five,6.0,7,60000
3,two,10.0,10,65000
4,seven,9.0,6,70000
5,three,7.0,10,62000
6,ten,,7,72000
7,eleven,7.0,8,80000


### Cleaning data:

In [4]:
df['test_score(out of 10)']=df['test_score(out of 10)'].fillna(df['test_score(out of 10)'].median())
df

Unnamed: 0,experience,test_score(out of 10),interview_score(out of 10),salary($)
0,,8.0,9,50000
1,,8.0,6,45000
2,five,6.0,7,60000
3,two,10.0,10,65000
4,seven,9.0,6,70000
5,three,7.0,10,62000
6,ten,8.0,7,72000
7,eleven,7.0,8,80000


In [5]:
df['experience']=df['experience'].apply(conversion_fun)
df['experience']

0     NaN
1     NaN
2     5.0
3     2.0
4     7.0
5     3.0
6    10.0
7    11.0
Name: experience, dtype: float64

### Case 1: Taking the median of experience years for NaN values:

In [6]:
df1=df.copy()
df1

Unnamed: 0,experience,test_score(out of 10),interview_score(out of 10),salary($)
0,,8.0,9,50000
1,,8.0,6,45000
2,5.0,6.0,7,60000
3,2.0,10.0,10,65000
4,7.0,9.0,6,70000
5,3.0,7.0,10,62000
6,10.0,8.0,7,72000
7,11.0,7.0,8,80000


In [7]:
df1['experience']=df1['experience'].fillna(df['experience'].median())
df1

Unnamed: 0,experience,test_score(out of 10),interview_score(out of 10),salary($)
0,6.0,8.0,9,50000
1,6.0,8.0,6,45000
2,5.0,6.0,7,60000
3,2.0,10.0,10,65000
4,7.0,9.0,6,70000
5,3.0,7.0,10,62000
6,10.0,8.0,7,72000
7,11.0,7.0,8,80000


### Training a linear regression object for predictions:

In [8]:
reg1=linear_model.LinearRegression()

In [9]:
reg1.fit(df1[['experience','test_score(out of 10)','interview_score(out of 10)']],df1['salary($)'])

LinearRegression()

### Predicting Salary of two candidates, one of them having 2 years experience, a test score of 9 and an interview score of 6 and another candidate has scored 10 in both tests with a work experience of 12 years:

In [10]:
reg1.predict([[2,9,6],[12,10,10]])



array([47056.91056911, 88227.64227642])

## Case 2: Taking zero as experience years for NaN values:

In [11]:
df2=df.copy()
df2

Unnamed: 0,experience,test_score(out of 10),interview_score(out of 10),salary($)
0,,8.0,9,50000
1,,8.0,6,45000
2,5.0,6.0,7,60000
3,2.0,10.0,10,65000
4,7.0,9.0,6,70000
5,3.0,7.0,10,62000
6,10.0,8.0,7,72000
7,11.0,7.0,8,80000


In [12]:
df2['experience']=df2['experience'].fillna(0)
df2

Unnamed: 0,experience,test_score(out of 10),interview_score(out of 10),salary($)
0,0.0,8.0,9,50000
1,0.0,8.0,6,45000
2,5.0,6.0,7,60000
3,2.0,10.0,10,65000
4,7.0,9.0,6,70000
5,3.0,7.0,10,62000
6,10.0,8.0,7,72000
7,11.0,7.0,8,80000


### Training the linear regression object with the following data:

In [14]:
reg2=linear_model.LinearRegression()

In [15]:
reg2.fit(df2[['experience','test_score(out of 10)','interview_score(out of 10)']],df2['salary($)'])

LinearRegression()

### Predicting Salary of two candidates, one of them having 2 years experience, a test score of 9 and an interview score of 6 and another candidate has scored 10 in both tests with a work experience of 12 years:

In [16]:
reg1.predict([[2,9,6],[12,10,10]])



array([47056.91056911, 88227.64227642])