# Importing Libraries
We begin by importing the necessary libraries for numerical operations and data manipulation.

In [None]:
import numpy as np
import pandas as pd

# Loading the Dataset
Here, we load the dataset from a CSV file and separate the features (independent variables) from the target (dependent variable).

In [None]:
dataset = pd.read_csv("datasets/ShopSellData.csv")

# Splitting the inputs and outputs
`dataset.iloc` can be used to index into rows and columns using integers.

The first parameter is rows and the second parameter is columns.

`dataset.iloc[:, :-1]` indexes all rows and columns from 0 to before last, thus excluding the last one.

if column names are known, then we can also use `dataset.loc` to perform indexing using only column names.

It is notable that using column names will index inclusively (last column is included in result unlike integers).

In [None]:
x = dataset.loc[:, :"Salary"].values

In [None]:
x

In [None]:
dataset.iloc[:, :-1]

In [None]:
y = dataset.iloc[:, 3].values

In [None]:
dataset.iloc[:, 3]

# Viewing the Dataset
We display the entire dataset and a quick overview of the first two rows to understand its structure.

In [None]:
dataset

In [None]:
dataset.head()

In [None]:
dataset.head(2)

In [None]:
dataset.tail(2)

# Label Encoding
Label Encoding is used to convert categorical data into numeric form. Here, we encode the 'Country' column.

`LabelEncoder` is a class we import from scikit-learn (imported as `sklearn`) library

In [None]:
from sklearn.preprocessing import LabelEncoder

In [None]:
label_encode_x = LabelEncoder()

In [None]:
x[:, 0] = label_encode_x.fit_transform(x[:, 0])

In [None]:
x

# One Hot Encoding
One Hot Encoding is used to create dummy variables for categorical data. This step ensures that the encoded categorical data does not imply any ordinal relationship.

In [None]:
from sklearn.preprocessing import OneHotEncoder

In [None]:
onehotencoder = OneHotEncoder()

In [None]:
onehotencoder.fit_transform(x[:, 0].reshape(-1, 1)).toarray()

# Encoding the Target Variable
Similar to the feature encoding, we encode the target variable 'Purchased' to convert it into numeric form.

In [None]:
label_encode_y = LabelEncoder()

In [None]:
y = label_encode_y.fit_transform(y)

In [None]:
y

# Splitting the Dataset
We split the dataset into training and test sets. This allows us to train our model on one set of data and test it on another to evaluate its performance.

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)

In [None]:
x_train

In [None]:
x_test

In [None]:
y_train

In [None]:
y_test

# Feature Scaling
Feature scaling is performed to standardize the range of independent variables. It ensures that each feature contributes equally to the model.

In [None]:
from sklearn.preprocessing import StandardScaler

In [None]:
sc_x = StandardScaler()

In [None]:
x_train_scaled = sc_x.fit_transform(x_train)

In [None]:
x_test_scaled = sc_x.transform(x_test)

In [None]:
x_train_scaled

In [None]:
x_test_scaled