**Step 1: Importing the libraries**

In [12]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


**Step 2: Importing dataset**

In [13]:
df = pd.read_csv("Data.csv")

In [14]:
df.head()

Unnamed: 0,Country,Age,Salary,Purchased
0,France,44.0,72000.0,No
1,Spain,27.0,48000.0,Yes
2,Germany,30.0,54000.0,No
3,Spain,38.0,61000.0,No
4,Germany,40.0,,Yes


In [38]:
df.Salary.isnull().sum()

1

In [39]:
x = df.iloc[:,:-1].values
y = df.iloc[:,3].values
x,y

(array([['France', 44.0, 72000.0],
        ['Spain', 27.0, 48000.0],
        ['Germany', 30.0, 54000.0],
        ['Spain', 38.0, 61000.0],
        ['Germany', 40.0, nan],
        ['France', 35.0, 58000.0],
        ['Spain', nan, 52000.0],
        ['France', 48.0, 79000.0],
        ['Germany', 50.0, 83000.0],
        ['France', 37.0, 67000.0]], dtype=object),
 array(['No', 'Yes', 'No', 'No', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes'],
       dtype=object))

**Step 3: Handling the missing data**

In [40]:
from sklearn.impute import SimpleImputer
imputer= SimpleImputer(missing_values=np.nan, strategy='mean')

#Fitting imputer object to the independent variables x.   
imputerimputer= imputer.fit(x[:, 1:3])  

#Replacing missing data with the calculated mean value  
x[:, 1:3]= imputer.transform(x[:, 1:3])  




SimpleImputer()

**Step 4: Encoding categorical data**

In [41]:
from sklearn.preprocessing import LabelEncoder  
label_encoder_x= LabelEncoder()  
x[:, 0]= label_encoder_x.fit_transform(x[:, 0])  
x

array([[0, 44.0, 72000.0],
       [2, 27.0, 48000.0],
       [1, 30.0, 54000.0],
       [2, 38.0, 61000.0],
       [1, 40.0, 63777.77777777778],
       [0, 35.0, 58000.0],
       [2, 38.77777777777778, 52000.0],
       [0, 48.0, 79000.0],
       [1, 50.0, 83000.0],
       [0, 37.0, 67000.0]], dtype=object)

**Step 5: Creating a dummy variable**

In [42]:
from sklearn.preprocessing import LabelEncoder, OneHotEncoder  
label_encoder_x= LabelEncoder()  
x[:, 0]= label_encoder_x.fit_transform(x[:, 0])  

#Encoding for dummy variables  
onehot_encoder= OneHotEncoder()    
x= onehot_encoder.fit_transform(x).toarray()  
x

array([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.,
        0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
        0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.,
        0., 0., 1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 0.,
        0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 1., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 1.],
       [1., 0., 0.,

In [43]:
labelencoder_y= LabelEncoder()  
y= labelencoder_y.fit_transform(y)  
y

array([0, 1, 0, 0, 1, 1, 0, 1, 0, 1])

**Step 6: Splitting the datasets into training sets and Test sets**

In [31]:
from sklearn.model_selection import train_test_split  
x_train, x_test, y_train, y_test= train_test_split(x, y, test_size= 0.2, random_state=0)  

**Step 7: Feature Scaling**

In [33]:
from sklearn.preprocessing import StandardScaler
st_x= StandardScaler()  
x_train= st_x.fit_transform(x_train) 
x_test= st_x.transform(x_test)

In [35]:
x_train,x_test

(array([[-1.        ,  2.64575131, -0.77459667, -0.37796447,  0.        ,
         -0.37796447, -0.37796447, -0.37796447, -0.37796447,  2.64575131,
         -0.37796447, -0.37796447,  0.        , -0.37796447, -0.37796447,
          0.        , -0.37796447, -0.37796447,  2.64575131, -0.37796447,
         -0.37796447, -0.37796447,  0.        ],
        [ 1.        , -0.37796447, -0.77459667, -0.37796447,  0.        ,
         -0.37796447,  2.64575131, -0.37796447, -0.37796447, -0.37796447,
         -0.37796447, -0.37796447,  0.        , -0.37796447, -0.37796447,
          0.        , -0.37796447, -0.37796447, -0.37796447,  2.64575131,
         -0.37796447, -0.37796447,  0.        ],
        [-1.        , -0.37796447,  1.29099445,  2.64575131,  0.        ,
         -0.37796447, -0.37796447, -0.37796447, -0.37796447, -0.37796447,
         -0.37796447, -0.37796447,  0.        ,  2.64575131, -0.37796447,
          0.        , -0.37796447, -0.37796447, -0.37796447, -0.37796447,
         -0.37