In [1]:
!pip install word2number



In [2]:
import pandas as pd
import numpy as np
from sklearn import linear_model
from word2number import w2n

In [3]:
df = pd.read_csv('hiring.csv')
df

Unnamed: 0,experience,test_score(out of 10),interview_score(out of 10),salary($)
0,,8.0,9,50000
1,,8.0,6,45000
2,five,6.0,7,60000
3,two,10.0,10,65000
4,seven,9.0,6,70000
5,three,7.0,10,62000
6,ten,,7,72000
7,eleven,7.0,8,80000


In [4]:
df.experience

0       NaN
1       NaN
2      five
3       two
4     seven
5     three
6       ten
7    eleven
Name: experience, dtype: object

In [5]:
df['experience'] = df.experience.fillna('zero')
df

Unnamed: 0,experience,test_score(out of 10),interview_score(out of 10),salary($)
0,0,8.0,9,50000
1,0,8.0,6,45000
2,five,6.0,7,60000
3,two,10.0,10,65000
4,seven,9.0,6,70000
5,three,7.0,10,62000
6,ten,,7,72000
7,eleven,7.0,8,80000


In [6]:
df['experience'] = df['experience'].apply(w2n.word_to_num)
df

Unnamed: 0,experience,test_score(out of 10),interview_score(out of 10),salary($)
0,0,8.0,9,50000
1,0,8.0,6,45000
2,5,6.0,7,60000
3,2,10.0,10,65000
4,7,9.0,6,70000
5,3,7.0,10,62000
6,10,,7,72000
7,11,7.0,8,80000


## Data Preprocessing: Fill NA values with mean value of a column.

In [7]:
df1 = df.copy()

In [8]:
df1['test_score(out of 10)'].mean()

7.857142857142857

In [9]:
mean_test_score = np.floor(df1['test_score(out of 10)'].mean())
mean_test_score

7.0

In [31]:
df1['test_score(out of 10)'] = df1['test_score(out of 10)'].fillna(mean_test_score)
df1

Unnamed: 0,experience,test_score(out of 10),interview_score(out of 10),salary($)
0,0,8.0,9,50000
1,0,8.0,6,45000
2,5,6.0,7,60000
3,2,10.0,10,65000
4,7,9.0,6,70000
5,3,7.0,10,62000
6,10,7.0,7,72000
7,11,7.0,8,80000


In [12]:
reg = linear_model.LinearRegression()
reg.fit(df1[['experience', 'test_score(out of 10)', 'interview_score(out of 10)']], df1['salary($)'])

LinearRegression()

In [13]:
reg.coef_

array([2922.26901502, 2221.30909959, 2147.48256637])

In [14]:
reg.intercept_

14992.65144669314

### Find the salary of a person who have 2 yrs of experience, 9 test score and 6 interview score.

In [15]:
reg.predict([[2, 7, 6]])

array([49271.24857206])

In [16]:
2812.95487627*2+1845.70596798*7+2205.24017467*6+17737.26346433771

49514.556040757714

### Find the salary of a person who have 12 yrs of experience, 10 test score and 10 interview score.

In [17]:
reg.predict([[12, 10, 10]])

array([93747.79628651])

In [18]:
2812.95487627*12+1845.70596798*10+2205.24017467*10+17737.26346433771

92002.1834060777

## Data Preprocessing: Fill NA values with median value of a column.

In [19]:
df2 = df.copy()

In [20]:
df2['test_score(out of 10)'].median()

8.0

In [21]:
median_test_score = np.floor(df2['test_score(out of 10)'].median())
median_test_score

8.0

In [30]:
df2['test_score(out of 10)'] = df2['test_score(out of 10)'].fillna(median_test_score)
df2

Unnamed: 0,experience,test_score(out of 10),interview_score(out of 10),salary($)
0,0,8.0,9,50000
1,0,8.0,6,45000
2,5,6.0,7,60000
3,2,10.0,10,65000
4,7,9.0,6,70000
5,3,7.0,10,62000
6,10,8.0,7,72000
7,11,7.0,8,80000


In [23]:
reg = linear_model.LinearRegression()
reg.fit(df2[['experience', 'test_score(out of 10)', 'interview_score(out of 10)']], df2['salary($)'])

LinearRegression()

In [24]:
reg.coef_

array([2812.95487627, 1845.70596798, 2205.24017467])

In [25]:
reg.intercept_

17737.26346433771

### Find the salary of a person who have 2 yrs of experience, 9 test score and 6 interview score.

In [26]:
reg.predict([[2, 7, 6]])

array([49514.55604076])

In [27]:
2812.95487627*2+1845.70596798*7+2205.24017467*6+17737.26346433771

49514.556040757714

### Find the salary of a person who have 12 yrs of experience, 10 test score and 10 interview score.

In [28]:
reg.predict([[12, 10, 10]])

array([92002.18340611])

In [29]:
2812.95487627*12+1845.70596798*10+2205.24017467*10+17737.26346433771

92002.1834060777