### Prepare a model that finds the suitable salary for
- a worker with 2 yr of expr, 9 test scores and 6 interview scores
- a worker with 12 yr of expr, 10 test scores and 10 interview scores

In [2]:
import pandas as pd
from sklearn import linear_model

In [3]:
df = pd.read_csv('hiring.csv')
df

Unnamed: 0,experience,test_score(out of 10),interview_score(out of 10),salary($)
0,,8.0,9,50000
1,,8.0,6,45000
2,five,6.0,7,60000
3,two,10.0,10,65000
4,seven,9.0,6,70000
5,three,7.0,10,62000
6,ten,,7,72000
7,eleven,7.0,8,80000


In [4]:
# we have some missing data points
# first for the test_score missing point... we can take its median

median = df['test_score(out of 10)'].median()
median

8.0

In [5]:
df['test_score(out of 10)'] = df['test_score(out of 10)'].fillna(median)

In [6]:
df

Unnamed: 0,experience,test_score(out of 10),interview_score(out of 10),salary($)
0,,8.0,9,50000
1,,8.0,6,45000
2,five,6.0,7,60000
3,two,10.0,10,65000
4,seven,9.0,6,70000
5,three,7.0,10,62000
6,ten,8.0,7,72000
7,eleven,7.0,8,80000


In [7]:
# now for the experience we can take the blank ones as 0
df.experience = df.experience.fillna("zero")

In [8]:
df

Unnamed: 0,experience,test_score(out of 10),interview_score(out of 10),salary($)
0,zero,8.0,9,50000
1,zero,8.0,6,45000
2,five,6.0,7,60000
3,two,10.0,10,65000
4,seven,9.0,6,70000
5,three,7.0,10,62000
6,ten,8.0,7,72000
7,eleven,7.0,8,80000


In [13]:
conda search word2number

Loading channels: ...working... done
# Name                       Version           Build  Channel             
word2number                      1.1            py_0  conda-forge         

Note: you may need to restart the kernel to use updated packages.


In [14]:
import sys
!conda install --yes --prefix {sys.prefix} word2number

Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... done

## Package Plan ##

  environment location: C:\Users\vagee\anaconda3

  added / updated specs:
    - word2number


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    conda-22.11.1              |   py39hcbf5309_1         908 KB  conda-forge
    python_abi-3.9             |           2_cp39           4 KB  conda-forge
    ruamel.yaml-0.17.21        |   py39hb82d6ee_1         169 KB  conda-forge
    ruamel.yaml.clib-0.2.6     |   py39h2bbff1b_1         101 KB
    word2number-1.1            |             py_0           8 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         1.2 MB

The following NEW packages will be INSTALLED:

  python_abi         conda-forge/win-64::python_abi-3.9-2_cp39 None
  rua



  current version: 22.9.0
  latest version: 22.11.1

Please update conda by running

    $ conda update -n base -c defaults conda




In [9]:
from word2number import w2n

In [10]:
w2n.word_to_num("eleven")

11

In [11]:
df.count().experience

8

In [12]:
df.experience.count()

8

In [14]:
df.experience = df.experience.apply(w2n.word_to_num)

ValueError: Type of input is not string! Please enter a valid number word (eg. 'two million twenty three thousand and forty nine')

In [15]:
df

Unnamed: 0,experience,test_score(out of 10),interview_score(out of 10),salary($)
0,0,8.0,9,50000
1,0,8.0,6,45000
2,5,6.0,7,60000
3,2,10.0,10,65000
4,7,9.0,6,70000
5,3,7.0,10,62000
6,10,8.0,7,72000
7,11,7.0,8,80000


In [16]:
model = linear_model.LinearRegression()

In [18]:
model.fit(df[['experience', 'test_score(out of 10)', 'interview_score(out of 10)']].values, df['salary($)'])

LinearRegression()

In [19]:
model.predict([[2, 9, 6]])

array([53205.96797671])

In [21]:
model.predict([[12, 10, 10]])

array([92002.18340611])