In [0]:
# %% [code]
#Importing necessary libraries
#install.packages('caTools')
#install.packages('ggplot2')
library(caTools)
library(ggplot2)

# %% [code]
# Importing the dataset
dataset <- read.csv('../input/social-network-ads/Social_Network_Ads.csv')
dataset <- dataset[3:5]

# %% [code]
# Encoding the target feature as factor
dataset$Purchased <- factor(dataset$Purchased, levels <- c(0, 1))

# %% [code]
# Splitting the dataset into the Training set and Test set
set.seed(123)
split <- sample.split(dataset$Purchased, SplitRatio <- 0.75)
training_set <- subset(dataset, split == TRUE)
test_set <- subset(dataset, split == FALSE)

# %% [code]
# Feature Scaling
training_set[-3] <- scale(training_set[-3])
test_set[-3] <- scale(test_set[-3])

# %% [code]
# Fitting Logistic Regression to the Training set
classifier <- glm(formula = Purchased ~ ., family <- binomial, data <- training_set)

# %% [code]
# Predicting the Test set results
prob_pred <- predict(classifier, type = 'response', newdata <- test_set[-3])
y_pred <- ifelse(prob_pred > 0.5, 1, 0)

# %% [code]
# Making the Confusion Matrix
cm <- table(test_set[, 3], y_pred > 0.5)

# %% [code]
# Visualising the Training set results
set <- training_set
x1 <- seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01)
x2 <- seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01)
grid_set <- expand.grid(x1, x2)
colnames(grid_set) <- c('Age', 'EstimatedSalary')
prob_set <- predict(classifier, type = 'response', newdata = grid_set)
y_grid <- ifelse(prob_set > 0.5, 1, 0)
plot(set[, -3], main = 'LR Plot for Training set', xlab = 'Age', ylab = 'Estimated Salary', xlim = range(x1), ylim = range(x2))
contour(x1, x2, matrix(as.numeric(y_grid), length(x1), length(x2)), add = TRUE)
points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato'))
points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3'))

# %% [code]
# Visualising the Test set results
set <- test_set
x1 <- seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01)
x2 <- seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01)
grid_set <- expand.grid(x1, x2)
colnames(grid_set) <- c('Age', 'EstimatedSalary')
prob_set <- predict(classifier, type = 'response', newdata = grid_set)
y_grid <- ifelse(prob_set > 0.5, 1, 0)
plot(set[, -3], main = 'LR plot for Test set', xlab = 'Age', ylab = 'Estimated Salary', xlim = range(x1), ylim = range(x2))
contour(x1, x2, matrix(as.numeric(y_grid), length(x1), length(x2)), add = TRUE)
points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato'))
points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3'))

# %% [code]
