***importing libraries***

In [76]:
import math
import numpy as np
import pandas as pd
import matplotlib as plt
from sklearn import linear_model
from word2number import w2n

***Reading our dataset in csv format***

In [77]:
df = pd.read_csv('Documents/salary.csv')
df

Unnamed: 0,experience,test_score,interview_score,salary
0,,8.0,9,50000
1,,8.0,6,45000
2,five,6.0,7,60000
3,two,10.0,10,65000
4,seven,9.0,6,70000
5,three,7.0,10,62000
6,ten,,7,72000
7,eleven,7.0,8,80000


In [78]:
df.test_score

0     8.0
1     8.0
2     6.0
3    10.0
4     9.0
5     7.0
6     NaN
7     7.0
Name: test_score, dtype: float64

***find the median***

In [79]:
ts = math.floor(df.test_score.median())
ts

8

***fill the na values***

In [80]:
df.test_score = df.test_score.fillna(ts)
df.experience = df.experience.fillna('zero')
df

Unnamed: 0,experience,test_score,interview_score,salary
0,zero,8.0,9,50000
1,zero,8.0,6,45000
2,five,6.0,7,60000
3,two,10.0,10,65000
4,seven,9.0,6,70000
5,three,7.0,10,62000
6,ten,8.0,7,72000
7,eleven,7.0,8,80000


***Converting our exp values in words to numbers***

In [81]:
df.experience.dtype

dtype('O')

In [82]:
df["experience"] = df.experience.astype(str)
df.experience.dtype

dtype('O')

In [83]:
df['experience'] = df.experience.apply(w2n.word_to_num)
df

Unnamed: 0,experience,test_score,interview_score,salary
0,0,8.0,9,50000
1,0,8.0,6,45000
2,5,6.0,7,60000
3,2,10.0,10,65000
4,7,9.0,6,70000
5,3,7.0,10,62000
6,10,8.0,7,72000
7,11,7.0,8,80000


***Training our model***

In [84]:
reg = linear_model.LinearRegression()
reg.fit(df[['experience', 'test_score', 'interview_score']], df.salary)

In [85]:
reg.coef_

array([2812.95487627, 1845.70596798, 2205.24017467])

In [86]:
reg.intercept_

17737.263464337688

***Testing out our trained model***

In [89]:
reg.predict([[2, 9, 6]])



array([53205.96797671])

In [92]:
2812.95487627*2+1845.70596798*9+2205.24017467*6+17737.263464337688

53205.96797671769

In [90]:
reg.predict([[12, 10, 10]])



array([92002.18340611])

In [93]:
2812.95487627*12+1845.70596798*10+2205.24017467*10+17737.263464337688

92002.18340607767

***Saving the model to a file for later use or transfer***

In [96]:
import joblib

In [97]:
joblib.dump(reg, 'salary_joblib')

['salary_joblib']

In [98]:
sj = joblib.load('salary_joblib')

In [100]:
sj.predict([[12, 10, 10]])



array([92002.18340611])