#### Dataset Creation

In [1]:
# Generates a make_moons dataset
# n_samples: 10,000 data points will be generated.
# noise: this parameter controls the amount of randomness or variability added to the generated dataset.
# random_state: It ensures reproducibility of the dataset. 
# By setting it to 0, the same random dataset will be generated each time you run the code with these parameters.

from sklearn.datasets import make_moons
x,y = make_moons(n_samples=10000, noise=0.5, random_state=0)

In [28]:
# each data point has two features
x

array([[ 2.14947704, -0.41259447],
       [ 1.19279353,  0.42481646],
       [-0.25546951,  1.5204891 ],
       ...,
       [-0.91861448,  0.59341167],
       [ 1.90261348,  0.00255057],
       [-0.15171694,  0.84876693]])

In [29]:
# target - binary classification : 0 and 1
y

array([1, 0, 0, ..., 0, 1, 0], dtype=int64)

In [30]:
# train test split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test= train_test_split(x,y, test_size=0.2, random_state=42)

In [31]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier,BaggingClassifier,AdaBoostClassifier,GradientBoostingClassifier
from sklearn.metrics import accuracy_score

#### Decision tree model

In [32]:
# fit a decision tree model
dclf=DecisionTreeClassifier()
dclf.fit(X_train,y_train)
dclf_pred=dclf.predict(X_test)
print(accuracy_score(y_test,dclf_pred))

0.754


#### Random forest model

In [33]:
# fit a random forest model
# By default, n_estimators = 100 . ie, 100 number of trees in the forest.

rclf=RandomForestClassifier()
rclf.fit(X_train,y_train)
rclf_pred=rclf.predict(X_test)
print(accuracy_score(y_test,rclf_pred))

0.795


##### compared to "Decision Tree model, accuracy go up by 5%

#### Baggingclassifier

In [35]:
# fit a baggingclassifier
# n_estimators - The number of base estimators in the ensemble.
bclf=BaggingClassifier(n_estimators=100)
bclf.fit(X_train,y_train)
bclf_pred=bclf.predict(X_test)
print(accuracy_score(y_test,bclf_pred))

0.79


##### Almost same accuracy as Random forest

#### Adaboost

In [36]:
# Fit a AdaBoost model
aclf=AdaBoostClassifier(n_estimators=100)
aclf.fit(X_train,y_train)
aclf_pred=aclf.predict(X_test)
print(accuracy_score(y_test,aclf_pred))



0.833


##### compared to "Decision Tree model, accuracy go up by 7%

#### Gradientboost

In [37]:
# Fit a GradientBoosting model
gclf=GradientBoostingClassifier(n_estimators=100)
gclf.fit(X_train,y_train)
gclf_pred=gclf.predict(X_test)
print(accuracy_score(y_test,gclf_pred))

0.8335


##### Almost same accuracy as Adaboost

#### XGBoost

In [40]:
pip install xgboost





[notice] A new release of pip is available: 23.2.1 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [43]:
import xgboost as xgb
xclf = xgb.XGBClassifier()
xclf.fit(X_train, y_train)
xclf_pred = xclf.predict(X_test)
print(accuracy_score(y_test, xclf_pred))

0.8135


#### Evaluation

In [44]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

In [48]:
print("Precision")
print("Decision tree : ",precision_score(y_test,dclf_pred))
print("Random forst : ",precision_score(y_test,rclf_pred))
print("Bagging Classifier : ",precision_score(y_test,bclf_pred))
print("Adaboost : ",precision_score(y_test,aclf_pred))
print("GradientBoost : ",precision_score(y_test,gclf_pred))
print("XGBoost : ",precision_score(y_test,xclf_pred))

Precision
Decision tree :  0.749
Random forst :  0.7911646586345381
Bagging Classifier :  0.786144578313253
Adaboost :  0.8129770992366412
GradientBoost :  0.8149568552253116
XGBoost :  0.8021547502448579


In [49]:
print("Recall")
print("Decision tree : ",recall_score(y_test,dclf_pred))
print("Random forst : ",recall_score(y_test,rclf_pred))
print("Bagging Classifier : ",recall_score(y_test,bclf_pred))
print("Adaboost : ",recall_score(y_test,aclf_pred))
print("GradientBoost : ",recall_score(y_test,gclf_pred))
print("XGBoost : ",recall_score(y_test,xclf_pred))

Recall
Decision tree :  0.7565656565656566
Random forst :  0.795959595959596
Bagging Classifier :  0.7909090909090909
Adaboost :  0.8606060606060606
GradientBoost :  0.8585858585858586
XGBoost :  0.8272727272727273


In [52]:
print("F1 score")
print("Decision tree : ",f1_score(y_test,dclf_pred))
print("Random forst : ",f1_score(y_test,rclf_pred))
print("Bagging Classifier : ",f1_score(y_test,bclf_pred))
print("Adaboost : ",f1_score(y_test,aclf_pred))
print("GradientBoost : ",f1_score(y_test,gclf_pred))
print("XGBoost : ",f1_score(y_test,xclf_pred))

F1 score
Decision tree :  0.7527638190954774
Random forst :  0.7935548841893253
Bagging Classifier :  0.7885196374622356
Adaboost :  0.8361138370951914
GradientBoost :  0.8362026561731432
XGBoost :  0.8145201392342118


In [56]:
print("confusion matrix")
print("Decision tree : ")
print(confusion_matrix(y_test,dclf_pred))
print("Random forst : ")
print(confusion_matrix(y_test,rclf_pred))
print("Bagging Classifier : ")
print(confusion_matrix(y_test,bclf_pred))
print("Adaboost : ")
print(confusion_matrix(y_test,aclf_pred))
print("GradientBoost : ")
print(confusion_matrix(y_test,gclf_pred))
print("XGBoost : ")
print(confusion_matrix(y_test,xclf_pred))

confusion matrix
Decision tree : 
[[759 251]
 [241 749]]
Random forst : 
[[802 208]
 [202 788]]
Bagging Classifier : 
[[797 213]
 [207 783]]
Adaboost : 
[[814 196]
 [138 852]]
GradientBoost : 
[[817 193]
 [140 850]]
XGBoost : 
[[808 202]
 [171 819]]


##### Instead we can use classification_report

In [57]:
print("classification_report")
print("Decision tree : ")
print(classification_report(y_test,dclf_pred))
print("Random forst : ")
print(classification_report(y_test,rclf_pred))
print("Bagging Classifier : ")
print(classification_report(y_test,bclf_pred))
print("Adaboost : ")
print(classification_report(y_test,aclf_pred))
print("GradientBoost : ")
print(classification_report(y_test,gclf_pred))
print("XGBoost : ")
print(classification_report(y_test,xclf_pred))

classification_report
Decision tree : 
              precision    recall  f1-score   support

           0       0.76      0.75      0.76      1010
           1       0.75      0.76      0.75       990

    accuracy                           0.75      2000
   macro avg       0.75      0.75      0.75      2000
weighted avg       0.75      0.75      0.75      2000

Random forst : 
              precision    recall  f1-score   support

           0       0.80      0.79      0.80      1010
           1       0.79      0.80      0.79       990

    accuracy                           0.80      2000
   macro avg       0.79      0.80      0.79      2000
weighted avg       0.80      0.80      0.80      2000

Bagging Classifier : 
              precision    recall  f1-score   support

           0       0.79      0.79      0.79      1010
           1       0.79      0.79      0.79       990

    accuracy                           0.79      2000
   macro avg       0.79      0.79      0.79      20

Based on the provided classification report:

AdaBoost and Gradient Boost achieve the highest accuracy of 83%.
AdaBoost and Gradient Boost exhibit high precision, recall, and F1-score for both classes, with balanced macro and weighted averages.
XGBoost also shows competitive performance, with an accuracy of 81% and balanced precision, recall, and F1-score.
Random Forest and Bagging Classifier have slightly lower accuracy (80% and 79%, respectively), but still demonstrate balanced performance across cla

It's advisable to conduct further analysis, such as hyperparameter tuning to ensure the suitability of the chosen model for the given classification task.sses.