In [1]:
# import package/ library
import pandas as pd
import numpy as np 
from sklearn.linear_model import LinearRegression

import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
data = pd.read_csv("hiring.csv")

# show total row and column of the dataframe or dataset
data.shape

(8, 4)

In [3]:
# show top five row
data.head()

Unnamed: 0,experience,test_score(out of 10),interview_score(out of 10),salary($)
0,,8.0,9,50000
1,,8.0,6,45000
2,five,6.0,7,60000
3,two,10.0,10,65000
4,seven,9.0,6,70000


In [4]:
# show bottom five row
data.tail()

Unnamed: 0,experience,test_score(out of 10),interview_score(out of 10),salary($)
3,two,10.0,10,65000
4,seven,9.0,6,70000
5,three,7.0,10,62000
6,ten,,7,72000
7,eleven,7.0,8,80000


In [5]:
# check for any null/nan values in each column
data.isna().sum()

experience                    2
test_score(out of 10)         1
interview_score(out of 10)    0
salary($)                     0
dtype: int64

In [6]:
# fill the null dataset with mean value in 2nd column and stored the changes in new dataframe

df = data.copy()
df['test_score(out of 10)'] = data['test_score(out of 10)'].fillna(data['test_score(out of 10)'].mean())

df['test_score(out of 10)'] = [np.rint(x) for x in df['test_score(out of 10)']]
df.tail()

Unnamed: 0,experience,test_score(out of 10),interview_score(out of 10),salary($)
3,two,10.0,10,65000
4,seven,9.0,6,70000
5,three,7.0,10,62000
6,ten,8.0,7,72000
7,eleven,7.0,8,80000


In [7]:
# fill the first column with the mode
df['experience'] = data['experience'].fillna(data['experience'].mode())
df.head()

Unnamed: 0,experience,test_score(out of 10),interview_score(out of 10),salary($)
0,eleven,8.0,9,50000
1,five,8.0,6,45000
2,five,6.0,7,60000
3,two,10.0,10,65000
4,seven,9.0,6,70000


In [8]:
# check datatypes of each column
df.dtypes

experience                     object
test_score(out of 10)         float64
interview_score(out of 10)      int64
salary($)                       int64
dtype: object

In [9]:
# since computer understand only numbers we have to convert column 1 strings to numbers using a specific library call word2numb
from word2number import w2n

# change the string to value ex. one --> 1 , two --> 2, three--> 3 and so on
df['experience'] = df["experience"].apply(lambda x: w2n.word_to_num(x))
df['experience']

0    11
1     5
2     5
3     2
4     7
5     3
6    10
7    11
Name: experience, dtype: int64

In [10]:
# show all the columns name
df.columns

Index(['experience', 'test_score(out of 10)', 'interview_score(out of 10)',
       'salary($)'],
      dtype='object')

In [11]:
# built a linear regression model for multi variable 
lrg = LinearRegression()
lrg.fit(df[['experience', 'test_score(out of 10)', 'interview_score(out of 10)']].values, df['salary($)'].values)

In [12]:
df.head()

Unnamed: 0,experience,test_score(out of 10),interview_score(out of 10),salary($)
0,11,8.0,9,50000
1,5,8.0,6,45000
2,5,6.0,7,60000
3,2,10.0,10,65000
4,7,9.0,6,70000


In [13]:
lrg.predict(df.drop(['salary($)'],axis=1).values)

array([68341.54242478, 59735.87190424, 59480.1971416 , 61028.2137706 ,
       62321.53517466, 60436.05624243, 65675.07115146, 66981.51219023])

In [14]:
# lets put the predicted value in the dataframe
df['predicted_price'] = lrg.predict(df.drop(['salary($)'],axis=1).values)

In [15]:
df.head()

Unnamed: 0,experience,test_score(out of 10),interview_score(out of 10),salary($),predicted_price
0,11,8.0,9,50000,68341.542425
1,5,8.0,6,45000,59735.871904
2,5,6.0,7,60000,59480.197142
3,2,10.0,10,65000,61028.213771
4,7,9.0,6,70000,62321.535175


## How to use word2number library to convert word to number

In [16]:
from word2number import w2n
import pandas as pd 

In [17]:
d = pd.DataFrame({
    'number':['one', 'two', 'three', 'four', 'ten', 'twenty one', 'thirty-one']
})

In [18]:
d

Unnamed: 0,number
0,one
1,two
2,three
3,four
4,ten
5,twenty one
6,thirty-one


In [19]:
d['nm'] = d['number'].apply(w2n.word_to_num)
d

Unnamed: 0,number,nm
0,one,1
1,two,2
2,three,3
3,four,4
4,ten,10
5,twenty one,21
6,thirty-one,31


In [20]:
d['nms'] = d['number'].apply(lambda x: w2n.word_to_num(x))
d

Unnamed: 0,number,nm,nms
0,one,1,1
1,two,2,2
2,three,3,3
3,four,4,4
4,ten,10,10
5,twenty one,21,21
6,thirty-one,31,31
