# Saving a ML model:

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('dark_background')
from sklearn import linear_model
from word2number import w2n

In [2]:
def converter(x):
    if x is np.nan:
        return 0
    return w2n.word_to_num(x)

In [3]:
df=pd.read_csv('hiring.csv')
df

Unnamed: 0,experience,test_score(out of 10),interview_score(out of 10),salary($)
0,,8.0,9,50000
1,,8.0,6,45000
2,five,6.0,7,60000
3,two,10.0,10,65000
4,seven,9.0,6,70000
5,three,7.0,10,62000
6,ten,,7,72000
7,eleven,7.0,8,80000


## Data Cleaning:

### Cleaning test_score(out of 10) column:

In [4]:
med_score=df['test_score(out of 10)'].median()
med_score

8.0

In [5]:
df['test_score(out of 10)']=df['test_score(out of 10)'].fillna(med_score)
df

Unnamed: 0,experience,test_score(out of 10),interview_score(out of 10),salary($)
0,,8.0,9,50000
1,,8.0,6,45000
2,five,6.0,7,60000
3,two,10.0,10,65000
4,seven,9.0,6,70000
5,three,7.0,10,62000
6,ten,8.0,7,72000
7,eleven,7.0,8,80000


### Cleaning experience column:

In [6]:
df['experience']=df['experience'].apply(converter)
df

Unnamed: 0,experience,test_score(out of 10),interview_score(out of 10),salary($)
0,0,8.0,9,50000
1,0,8.0,6,45000
2,5,6.0,7,60000
3,2,10.0,10,65000
4,7,9.0,6,70000
5,3,7.0,10,62000
6,10,8.0,7,72000
7,11,7.0,8,80000


## Training the ML object:

In [7]:
reg=linear_model.LinearRegression()

In [8]:
reg.fit(df[['experience','test_score(out of 10)','interview_score(out of 10)']],df['salary($)'])

LinearRegression()

In [9]:
reg.predict([[2,9,6],[12,10,10]])



array([53205.96797671, 92002.18340611])

## Saving the trained ML model:

### Using pickle module:

In [10]:
import pickle

In [12]:
with open('hiring','wb') as f:
    pickle.dump(reg,f)