# Importing Libraries

In [48]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import accuracy_score

# Data Pre-processing

In [49]:
data = pd.read_csv('Social_Network_Ads.csv')

In [50]:
data['Gender'] =data['Gender'].astype('category')
#['Male','Female'] = [1,0]
data['Gender'] = data['Gender'].cat.codes

In [51]:
data = data.drop(['User ID'],axis=1)

In [52]:
# Feature Scaling
min_max_scaler = MinMaxScaler()
data[['EstimatedSalary']] = min_max_scaler.fit_transform(data[['EstimatedSalary']])
data.head()

Unnamed: 0,Gender,Age,EstimatedSalary,Purchased
0,1,19,0.02963,0
1,1,35,0.037037,0
2,0,26,0.207407,0
3,0,27,0.311111,0
4,1,19,0.451852,0


In [53]:
X = data.iloc[:, :-1]
y = data.iloc[:, -1]

In [54]:
# Splitting the dataset into the Training set and Test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 101)
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(300, 3)
(300,)
(100, 3)
(100,)


# Random Forest

## Training and Testing the data

In [55]:
model = RandomForestClassifier()
# fit the model with the training data
model.fit(X_train,y_train)
# number of trees used
print('Number of Trees used : ', model.n_estimators)

Number of Trees used :  100


In [56]:
# predict the target on the train dataset
predict_train = model.predict(X_train)
# Accuray Score on train dataset
accuracy_train = accuracy_score(y_train,predict_train)
print('\naccuracy_score on train dataset : ', accuracy_train)

# predict the target on the test dataset
predict_test = model.predict(X_test)
# Accuracy Score on test dataset
accuracy_test = accuracy_score(y_test,predict_test)
print('\naccuracy_score on test dataset : ', accuracy_test)



accuracy_score on train dataset :  0.9966666666666667

accuracy_score on test dataset :  0.9


Predicted Results

In [57]:
pred = pd.DataFrame(predict_test.tolist())
pred.value_counts()

0    57
1    43
dtype: int64

Actual Results

In [58]:
y_test.value_counts()

0    65
1    35
Name: Purchased, dtype: int64

## Analysing the results

In [59]:
cm = confusion_matrix(y_test, pred)
print(cm)

[[56  9]
 [ 1 34]]


In [60]:
report = classification_report(y_test, pred,labels=[0,1])
print(report)

              precision    recall  f1-score   support

           0       0.98      0.86      0.92        65
           1       0.79      0.97      0.87        35

    accuracy                           0.90       100
   macro avg       0.89      0.92      0.89       100
weighted avg       0.92      0.90      0.90       100



In [61]:
print('Accuracy:\t',accuracy_score(y_test,pred))

Accuracy:	 0.9


# Ada Boost

## Training and Testing the data

In [62]:
model = AdaBoostClassifier(random_state=101)
lables = model.fit(X_train, y_train)

## Analysing the result

In [63]:
print('Accuracy:\t',model.score(X_test,y_test))
print('Estimators:\t',model.n_estimators)
print('Learning Rate:\t',model.learning_rate)

Accuracy:	 0.94
Estimators:	 50
Learning Rate:	 1.0
