# Importing Libraries


In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn import preprocessing
from sklearn import metrics
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LinearRegression
from sklearn.cluster import KMeans

# Loading and Preparing Dataset

In [3]:
data = pd.read_csv(r"C:\Users\ahmed\Downloads\Business Intelligence\advertising.csv")

data.head()

Unnamed: 0,Daily Time Spent on Site,Age,Area Income,Daily Internet Usage,Ad Topic Line,City,Male,Country,Timestamp,Clicked on Ad
0,68.95,35,61833.9,256.09,Cloned 5thgeneration orchestration,Wrightburgh,0,Tunisia,27/03/2016 0:53,0
1,80.23,31,68441.85,193.77,Monitored national standardization,West Jodi,1,Nauru,04/04/2016 1:39,0
2,69.47,26,59785.94,236.5,Organic bottom-line service-desk,Davidton,0,San Marino,13/03/2016 20:35,0
3,74.15,29,54806.18,245.89,Triple-buffered reciprocal time-frame,West Terrifurt,1,Italy,10/01/2016 2:31,0
4,68.37,35,73889.99,225.58,Robust logistical utilization,South Manuel,0,Iceland,03/06/2016 3:36,0


In [4]:
data.isnull().sum()/len(data)

Daily Time Spent on Site    0.0
Age                         0.0
Area Income                 0.0
Daily Internet Usage        0.0
Ad Topic Line               0.0
City                        0.0
Male                        0.0
Country                     0.0
Timestamp                   0.0
Clicked on Ad               0.0
dtype: float64

In [5]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 10 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Daily Time Spent on Site  1000 non-null   float64
 1   Age                       1000 non-null   int64  
 2   Area Income               1000 non-null   float64
 3   Daily Internet Usage      1000 non-null   float64
 4   Ad Topic Line             1000 non-null   object 
 5   City                      1000 non-null   object 
 6   Male                      1000 non-null   int64  
 7   Country                   1000 non-null   object 
 8   Timestamp                 1000 non-null   object 
 9   Clicked on Ad             1000 non-null   int64  
dtypes: float64(3), int64(3), object(4)
memory usage: 78.2+ KB


In [6]:
#Look at the correlation matrix for the maximum correlated variables with the outcome variable
correlation_matrix = data.corr().round(2)
print(correlation_matrix)

                          Daily Time Spent on Site   Age  Area Income  \
Daily Time Spent on Site                      1.00 -0.33         0.31   
Age                                          -0.33  1.00        -0.18   
Area Income                                   0.31 -0.18         1.00   
Daily Internet Usage                          0.52 -0.37         0.34   
Male                                         -0.02 -0.02         0.00   
Clicked on Ad                                -0.75  0.49        -0.48   

                          Daily Internet Usage  Male  Clicked on Ad  
Daily Time Spent on Site                  0.52 -0.02          -0.75  
Age                                      -0.37 -0.02           0.49  
Area Income                               0.34  0.00          -0.48  
Daily Internet Usage                      1.00  0.03          -0.79  
Male                                      0.03  1.00          -0.04  
Clicked on Ad                            -0.79 -0.04           1.00 

In [7]:
def normalize(df, features_list):
    result = df.copy()
    for feature_name in features_list:
        max_value = df[feature_name].max()
        min_value = df[feature_name].min()
        result[feature_name] = (df[feature_name] - min_value) / (max_value - min_value)
    return result

In [8]:
data = normalize(data,['Daily Time Spent on Site', 'Age', 'Area Income', 'Daily Internet Usage'])

In [9]:
data.drop(['Ad Topic Line', 'City', 'Country', 'Timestamp','Male'], axis=1, inplace=True)

In [10]:
data.head()

Unnamed: 0,Daily Time Spent on Site,Age,Area Income,Daily Internet Usage,Clicked on Ad
0,0.617882,0.380952,0.730472,0.916031,0
1,0.809621,0.285714,0.831375,0.538746,0
2,0.626721,0.166667,0.6992,0.797433,0
3,0.706272,0.238095,0.62316,0.85428,0
4,0.608023,0.380952,0.914568,0.731323,0


In [11]:
X = data[['Daily Time Spent on Site', 'Age', 'Area Income', 'Daily Internet Usage']]
y = data['Clicked on Ad']

# Logistic Regression Model Building

In [12]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=5)

In [13]:
logistic_regression = LogisticRegression()

logistic_regression.fit(X_train,y_train)

LogisticRegression()

# Prediction

In [14]:
y_pred=logistic_regression.predict(X_test)

In [15]:
confusion_matrix = metrics.confusion_matrix(y_test, y_pred)
print(confusion_matrix)

[[97  2]
 [ 9 92]]


In [16]:
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
print("Precision:",metrics.precision_score(y_test, y_pred))
print("Recall:",metrics.recall_score(y_test, y_pred))

Accuracy: 0.945
Precision: 0.9787234042553191
Recall: 0.9108910891089109


# Building & Evaluating Decison Tree Model

In [17]:
decision_tree = DecisionTreeClassifier() 
decision_tree.fit(X_train, y_train) 

DecisionTreeClassifier()

In [18]:
Y_pred = decision_tree.predict(X_test)


In [19]:
Confusion_matrix = metrics.confusion_matrix(y_test, Y_pred)
print(Confusion_matrix)

[[93  6]
 [ 9 92]]


In [20]:
print("Accuracy:",metrics.accuracy_score(y_test, Y_pred))
print("Precision:",metrics.precision_score(y_test, Y_pred))
print("Recall:",metrics.recall_score(y_test, Y_pred))

Accuracy: 0.925
Precision: 0.9387755102040817
Recall: 0.9108910891089109


# Building & Evaluating SVM Model

In [21]:
svc_model = SVC()
svc_model.fit(X_train,y_train)

SVC()

In [22]:
predictions = svc_model.predict(X_test)

In [23]:
Confusion_matrix = metrics.confusion_matrix(y_test, predictions)
print(Confusion_matrix)

[[96  3]
 [ 7 94]]


In [24]:
print("Accuracy:",metrics.accuracy_score(y_test, predictions))
print("Precision:",metrics.precision_score(y_test, predictions))
print("Recall:",metrics.recall_score(y_test, predictions))

Accuracy: 0.95
Precision: 0.9690721649484536
Recall: 0.9306930693069307
