In [None]:
#Data.csv

**Step 1: Importing the libraries**

In [17]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler

**Step 2: Importing dataset**

In [3]:
df = pd.read_csv('Data.csv')
df

Unnamed: 0,Country,Age,Salary,Purchased
0,France,44.0,72000.0,No
1,Spain,27.0,48000.0,Yes
2,Germany,30.0,54000.0,No
3,Spain,38.0,61000.0,No
4,Germany,40.0,,Yes
5,France,35.0,58000.0,Yes
6,Spain,,52000.0,No
7,France,48.0,79000.0,Yes
8,Germany,50.0,83000.0,No
9,France,37.0,67000.0,Yes


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Country    10 non-null     object 
 1   Age        9 non-null      float64
 2   Salary     9 non-null      float64
 3   Purchased  10 non-null     object 
dtypes: float64(2), object(2)
memory usage: 448.0+ bytes


In [7]:
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Age,9.0,38.777778,7.693793,27.0,35.0,38.0,44.0,50.0
Salary,9.0,63777.777778,12265.579662,48000.0,54000.0,61000.0,72000.0,83000.0


**Step 3: Handling the missing data**

In [8]:
df.isnull().sum()

Country      0
Age          1
Salary       1
Purchased    0
dtype: int64

In [9]:
df['Age'].fillna(df['Age'].mode()[0], inplace=True)
df['Salary'].fillna(df['Salary'].mode()[0], inplace=True)

In [10]:
df.isnull().sum()

Country      0
Age          0
Salary       0
Purchased    0
dtype: int64

**Step 4: Encoding categorical data**

In [12]:
df.Purchased.replace({'Yes':1, 'No':0}, inplace=True)
df

Unnamed: 0,Country,Age,Salary,Purchased
0,France,44.0,72000.0,0
1,Spain,27.0,48000.0,1
2,Germany,30.0,54000.0,0
3,Spain,38.0,61000.0,0
4,Germany,40.0,48000.0,1
5,France,35.0,58000.0,1
6,Spain,27.0,52000.0,0
7,France,48.0,79000.0,1
8,Germany,50.0,83000.0,0
9,France,37.0,67000.0,1


**Step 5: Creating a dummy variable**

In [13]:
df = pd.get_dummies(df, columns=['Country'])
df

Unnamed: 0,Age,Salary,Purchased,Country_France,Country_Germany,Country_Spain
0,44.0,72000.0,0,1,0,0
1,27.0,48000.0,1,0,0,1
2,30.0,54000.0,0,0,1,0
3,38.0,61000.0,0,0,0,1
4,40.0,48000.0,1,0,1,0
5,35.0,58000.0,1,1,0,0
6,27.0,52000.0,0,0,0,1
7,48.0,79000.0,1,1,0,0
8,50.0,83000.0,0,0,1,0
9,37.0,67000.0,1,1,0,0


**Step 6: Splitting the datasets into training sets and Test sets**

In [14]:
X_FEATURE = df.drop('Purchased', axis=1)
Y_LABEL = df.Purchased
X_train, X_test, Y_train, Y_test = train_test_split(X_FEATURE, Y_LABEL, test_size=0.3, random_state=42) # test data 30%

In [15]:
X_train.shape, X_test.shape

((7, 5), (3, 5))

**Step 7: Feature Scaling**

In [18]:
# Standardization
func = StandardScaler()
func.fit(X_train)
transf_xtrain = func.transform(X_train)
transf_xtest = func.transform(X_test)
transf_xtrain

array([[ 0.92188302,  0.96404767,  1.15470054, -0.63245553, -0.63245553],
       [ 1.50853585,  1.62937634,  1.15470054, -0.63245553, -0.63245553],
       [-1.13140189, -0.74679749, -0.8660254 ,  1.58113883, -0.63245553],
       [-0.10475943,  0.4888129 ,  1.15470054, -0.63245553, -0.63245553],
       [ 0.33523019, -1.31707921, -0.8660254 ,  1.58113883, -0.63245553],
       [ 0.04190377, -0.08146882, -0.8660254 , -0.63245553,  1.58113883],
       [-1.57139151, -0.9368914 , -0.8660254 , -0.63245553,  1.58113883]])

In [19]:
# Normalization
func = MinMaxScaler(feature_range=(0,1)) # default range=(0, 1)
func.fit(X_train)
transf_xtrain = func.transform(X_train)
transf_xtest = func.transform(X_test)
transf_xtrain

array([[0.80952381, 0.77419355, 1.        , 0.        , 0.        ],
       [1.        , 1.        , 1.        , 0.        , 0.        ],
       [0.14285714, 0.19354839, 0.        , 1.        , 0.        ],
       [0.47619048, 0.61290323, 1.        , 0.        , 0.        ],
       [0.61904762, 0.        , 0.        , 1.        , 0.        ],
       [0.52380952, 0.41935484, 0.        , 0.        , 1.        ],
       [0.        , 0.12903226, 0.        , 0.        , 1.        ]])