# 1. Importing necessary libraries

In [1]:
!pip install word2number

Collecting word2number
  Downloading word2number-1.1.zip (9.7 kB)
Building wheels for collected packages: word2number
  Building wheel for word2number (setup.py): started
  Building wheel for word2number (setup.py): finished with status 'done'
  Created wheel for word2number: filename=word2number-1.1-py3-none-any.whl size=5583 sha256=7acb8cc8d4742556ab815229640fc5b843d5928aad05aaaab0ef1e54e0d45c12
  Stored in directory: c:\users\hp\appdata\local\pip\cache\wheels\a0\4a\5b\d2f2df5c344ddbecb8bea759872c207ea91d93f57fb54e816e
Successfully built word2number
Installing collected packages: word2number
Successfully installed word2number-1.1


In [107]:
from word2number import w2n

import pandas as pd
from sklearn.linear_model import LinearRegression
from pickle import dump,load

import warnings
warnings.filterwarnings('ignore')

# 2. Data Collection

In [76]:
hiring_data = pd.read_csv('https://raw.githubusercontent.com/codebasics/py/master/ML/2_linear_reg_multivariate/Exercise/hiring.csv')
hiring_data

Unnamed: 0,experience,test_score(out of 10),interview_score(out of 10),salary($)
0,,8.0,9,50000
1,,8.0,6,45000
2,five,6.0,7,60000
3,two,10.0,10,65000
4,seven,9.0,6,70000
5,three,7.0,10,62000
6,ten,,7,72000
7,eleven,7.0,8,80000


# 3. Data Understanding

In [77]:
hiring_data.dtypes

experience                     object
test_score(out of 10)         float64
interview_score(out of 10)      int64
salary($)                       int64
dtype: object

In [78]:
hiring_data.isna().sum()

experience                    2
test_score(out of 10)         1
interview_score(out of 10)    0
salary($)                     0
dtype: int64

# 4. Data Preprocessing

## 4.1 Filling Nan values

In [79]:
hiring_data['experience'] = hiring_data['experience'].fillna('zero')
hiring_data

Unnamed: 0,experience,test_score(out of 10),interview_score(out of 10),salary($)
0,zero,8.0,9,50000
1,zero,8.0,6,45000
2,five,6.0,7,60000
3,two,10.0,10,65000
4,seven,9.0,6,70000
5,three,7.0,10,62000
6,ten,,7,72000
7,eleven,7.0,8,80000


In [83]:
hiring_data['test_score(out of 10)'] = hiring_data['test_score(out of 10)'].fillna(hiring_data['test_score(out of 10)'].median())

## 4.2 Converting string to number using word_to_number function by installing word2number module

In [80]:
hiring_data['experience'] = hiring_data['experience'].apply(w2n.word_to_num)

In [84]:
hiring_data

Unnamed: 0,experience,test_score(out of 10),interview_score(out of 10),salary($)
0,0,8.0,9,50000
1,0,8.0,6,45000
2,5,6.0,7,60000
3,2,10.0,10,65000
4,7,9.0,6,70000
5,3,7.0,10,62000
6,10,8.0,7,72000
7,11,7.0,8,80000


In [85]:
hiring_data.isna().sum()

experience                    0
test_score(out of 10)         0
interview_score(out of 10)    0
salary($)                     0
dtype: int64

# 5. Model Building

In [90]:
X=hiring_data.drop('salary($)',axis=1)
y=hiring_data[['salary($)']]

# 6. Model Training

In [92]:
lin_model = LinearRegression()

In [93]:
lin_model.fit(X,y)

LinearRegression()

In [94]:
lin_model.coef_

array([[2812.95487627, 1845.70596798, 2205.24017467]])

In [95]:
lin_model.intercept_

array([17737.26346434])

# 7. Model Testing

In [103]:
lin_model.predict([[5,8.0,10]])

array([[68620.08733624]])

# 8. Model Deployment

In [105]:
dump(lin_model,open('hiring_intelligence.pkl','wb'))

In [108]:
model_loaded = load(open('hiring_intelligence.pkl','rb'))

### Predict salary for future candidates
* 2yr, 9 test score, 6 interview score
* 12yr, 10 test score, 10 interview score

In [110]:
model_loaded.predict([[2,9,6],[12,10,10]])

array([[53205.96797671],
       [92002.18340611]])