In [1]:
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# !pip install word2number
from word2number import w2n

# Data Preprocessing

In [2]:
df = pd.read_csv("hiring.csv")

In [3]:
df.rename(columns = {'test_score(out of 10)':'test_score', 'interview_score(out of 10)':'interview_score', 'salary($)':'salary'}, inplace = True)

In [4]:
df

Unnamed: 0,experience,test_score,interview_score,salary
0,,8.0,9,50000
1,,8.0,6,45000
2,five,6.0,7,60000
3,two,10.0,10,65000
4,seven,9.0,6,70000
5,three,7.0,10,62000
6,ten,,7,72000
7,eleven,7.0,8,80000


In [5]:
df.isnull().sum()

experience         2
test_score         1
interview_score    0
salary             0
dtype: int64

In [6]:
df.experience = df.experience.fillna("zero")

In [7]:
df.experience = df.experience.apply(w2n.word_to_num)
df

Unnamed: 0,experience,test_score,interview_score,salary
0,0,8.0,9,50000
1,0,8.0,6,45000
2,5,6.0,7,60000
3,2,10.0,10,65000
4,7,9.0,6,70000
5,3,7.0,10,62000
6,10,,7,72000
7,11,7.0,8,80000


In [8]:
test_score_median = math.floor(df['test_score'].mean()) 
df.test_score = df.test_score.fillna(test_score_median)
df

Unnamed: 0,experience,test_score,interview_score,salary
0,0,8.0,9,50000
1,0,8.0,6,45000
2,5,6.0,7,60000
3,2,10.0,10,65000
4,7,9.0,6,70000
5,3,7.0,10,62000
6,10,7.0,7,72000
7,11,7.0,8,80000


# Data Visualization

In [12]:
features = ['experience', 'test_score', 'interview_score']
target = ['salary']

In [17]:
X = df[features].values.reshape(-1, len(features))
y = df[target].values
print(X,y)

[[ 0.  8.  9.]
 [ 0.  8.  6.]
 [ 5.  6.  7.]
 [ 2. 10. 10.]
 [ 7.  9.  6.]
 [ 3.  7. 10.]
 [10.  7.  7.]
 [11.  7.  8.]] [[50000]
 [45000]
 [60000]
 [65000]
 [70000]
 [62000]
 [72000]
 [80000]]


In [18]:
from sklearn import linear_model

reg = linear_model.LinearRegression()
model = reg.fit(X, y)

In [19]:
coefficient = model.coef_
print(coefficient)

[[2922.26901502 2221.30909959 2147.48256637]]


In [21]:
intercept = model.intercept_
print(intercept)

[14992.65144669]


In [22]:
model.score(X, y)

0.9770139014273139

In [26]:
x_pred = np.array([2, 9, 6]).reshape(-1, 3) # make 2d array
print(x_pred)

model.predict(x_pred)

[[2 9 6]]


array([[53713.86677124]])