In [1]:
#Data.csv

**Step 1: Importing the libraries**

In [2]:
import pandas as pd
import numpy as np
from sklearn import preprocessing

In [3]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

**Step 2: Importing dataset**

In [4]:
df = pd.read_csv('Data.csv')

**Step 3: Handling the missing data**

In [5]:
Salary_mean = round(df['Salary'].mean())
Age_mean = round(df['Age'].mean())
df['Salary'] = df['Salary'].fillna(value = Salary_mean)
df['Age'] = df['Age'].fillna(value = Age_mean)
df

Unnamed: 0,Country,Age,Salary,Purchased
0,France,44.0,72000.0,No
1,Spain,27.0,48000.0,Yes
2,Germany,30.0,54000.0,No
3,Spain,38.0,61000.0,No
4,Germany,40.0,63778.0,Yes
5,France,35.0,58000.0,Yes
6,Spain,39.0,52000.0,No
7,France,48.0,79000.0,Yes
8,Germany,50.0,83000.0,No
9,France,37.0,67000.0,Yes


**Step 4: Encoding categorical data**

In [6]:
le = LabelEncoder()

df['Country'] = le.fit_transform(df['Country'])

print(df)

   Country   Age   Salary Purchased
0        0  44.0  72000.0        No
1        2  27.0  48000.0       Yes
2        1  30.0  54000.0        No
3        2  38.0  61000.0        No
4        1  40.0  63778.0       Yes
5        0  35.0  58000.0       Yes
6        2  39.0  52000.0        No
7        0  48.0  79000.0       Yes
8        1  50.0  83000.0        No
9        0  37.0  67000.0       Yes


**Step 5: Creating a dummy variable**

In [7]:
df = pd.concat([df,pd.get_dummies(df['Purchased'])],axis = 1)


In [8]:
df = df.drop(['Purchased'],axis = 1)
df

Unnamed: 0,Country,Age,Salary,No,Yes
0,0,44.0,72000.0,1,0
1,2,27.0,48000.0,0,1
2,1,30.0,54000.0,1,0
3,2,38.0,61000.0,1,0
4,1,40.0,63778.0,0,1
5,0,35.0,58000.0,0,1
6,2,39.0,52000.0,1,0
7,0,48.0,79000.0,0,1
8,1,50.0,83000.0,1,0
9,0,37.0,67000.0,0,1


**Step 6: Splitting the datasets into training sets and Test sets**

In [9]:
x= df.drop('Salary',axis = 1)
y = df['Salary']

x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.30,random_state = 0)
print(x_train,'\n',y_train,'\n',x_test,'\n',y_test)

   Country   Age  No  Yes
9        0  37.0   0    1
1        2  27.0   0    1
6        2  39.0   1    0
7        0  48.0   0    1
3        2  38.0   1    0
0        0  44.0   1    0
5        0  35.0   0    1 
 9    67000.0
1    48000.0
6    52000.0
7    79000.0
3    61000.0
0    72000.0
5    58000.0
Name: Salary, dtype: float64 
    Country   Age  No  Yes
2        1  30.0   1    0
8        1  50.0   1    0
4        1  40.0   0    1 
 2    54000.0
8    83000.0
4    63778.0
Name: Salary, dtype: float64


**Step 7: Feature Scaling**

In [10]:
MinMax = preprocessing.MinMaxScaler()
MinMax_x_train = MinMax.fit_transform(x_train)
MinMax_y_train = MinMax.fit_transform(y_train.values.reshape(-1,1))
MinMax_x_test =  MinMax.fit_transform(x_test)
MinMax_y_test =  MinMax.fit_transform(y_test.values.reshape(-1,1))
print('MinMax_x_train',MinMax_x_train,'\n','MinMax_y_train:',MinMax_y_train,'\n','MinMax_x_test',MinMax_x_test,'\n','MinMax_y_test',MinMax_y_test)

MinMax_x_train [[0.         0.47619048 0.         1.        ]
 [1.         0.         0.         1.        ]
 [1.         0.57142857 1.         0.        ]
 [0.         1.         0.         1.        ]
 [1.         0.52380952 1.         0.        ]
 [0.         0.80952381 1.         0.        ]
 [0.         0.38095238 0.         1.        ]] 
 MinMax_y_train: [[0.61290323]
 [0.        ]
 [0.12903226]
 [1.        ]
 [0.41935484]
 [0.77419355]
 [0.32258065]] 
 MinMax_x_test [[0.  0.  1.  0. ]
 [0.  1.  1.  0. ]
 [0.  0.5 0.  1. ]] 
 MinMax_y_test [[0.        ]
 [1.        ]
 [0.33717241]]


In [11]:
Scaler = preprocessing.StandardScaler()
scaler_x_train = Scaler.fit_transform(x_train)
scaler_y_train = Scaler.fit_transform(y_train.values.reshape(-1,1))
scaler_x_test =  Scaler.fit_transform(x_test)
scaler_y_test =  Scaler.fit_transform(y_test.values.reshape(-1,1))
print('Scaler_x_train',scaler_x_train,'\n','Scaler_y_train:',scaler_y_train,'\n','Scaler_x_test',scaler_x_test,'\n','Scaler_y_test',scaler_y_test)

Scaler_x_train [[-0.8660254  -0.20801257 -0.8660254   0.8660254 ]
 [ 1.15470054 -1.82588815 -0.8660254   0.8660254 ]
 [ 1.15470054  0.11556254  1.15470054 -1.15470054]
 [-0.8660254   1.57165056 -0.8660254   0.8660254 ]
 [ 1.15470054 -0.04622502  1.15470054 -1.15470054]
 [-0.8660254   0.92450033  1.15470054 -1.15470054]
 [-0.8660254  -0.53158769 -0.8660254   0.8660254 ]] 
 Scaler_y_train: [[ 0.44897083]
 [-1.41706417]
 [-1.0242147 ]
 [ 1.62751925]
 [-0.14030338]
 [ 0.94003267]
 [-0.43494049]] 
 Scaler_x_test [[ 0.         -1.22474487  0.70710678 -0.70710678]
 [ 0.          1.22474487  0.70710678 -0.70710678]
 [ 0.          0.         -1.41421356  1.41421356]] 
 Scaler_y_test [[-1.07299598]
 [ 1.33431358]
 [-0.2613176 ]]
