In [4]:
from sklearn.datasets import load_iris
from sklearn.cluster import KMeans
from sklearn.metrics import pairwise_distances_argmin_min
import pandas as pd

# Load data
data = load_iris()
X = data.data

# K-Means
kmeans = KMeans(n_clusters=3, random_state=42)
clusters = kmeans.fit_predict(X)

# Add cluster labels
df = pd.DataFrame(X, columns=data.feature_names)
df['cluster'] = clusters
print(df.head())

# Find similar sample
sample_index = 0
sample_point = X[sample_index].reshape(1, -1)
closest_indices, distances = pairwise_distances_argmin_min(sample_point, X)


print("\nClosest match:", df.iloc[closest_indices[0]])


   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)  \
0                5.1               3.5                1.4               0.2   
1                4.9               3.0                1.4               0.2   
2                4.7               3.2                1.3               0.2   
3                4.6               3.1                1.5               0.2   
4                5.0               3.6                1.4               0.2   

   cluster  
0        1  
1        1  
2        1  
3        1  
4        1  

Closest match: sepal length (cm)    5.1
sepal width (cm)     3.5
petal length (cm)    1.4
petal width (cm)     0.2
cluster              1.0
Name: 0, dtype: float64


In [7]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier

data = load_breast_cancer()
X, y = data.data, data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

rf = RandomForestClassifier(n_estimators=200, random_state=42)
rf.fit(X_train, y_train)
rf_acc = accuracy_score(y_test, rf.predict(X_test))

bag = BaggingClassifier(n_estimators=200, random_state=42)
bag.fit(X_train, y_train)
bag_acc = accuracy_score(y_test, bag.predict(X_test))

ada = AdaBoostClassifier(n_estimators=200, learning_rate=0.5, random_state=42)
ada.fit(X_train, y_train)
ada_acc = accuracy_score(y_test, ada.predict(X_test))

gb = GradientBoostingClassifier(random_state=42)
gb.fit(X_train, y_train)
gb_acc = accuracy_score(y_test, gb.predict(X_test))

print("This are the results of bagging and boosting")
print("Model Accuracy:")
print("Random Forest:", round(rf_acc, 4))
print("Bagging:", round(bag_acc, 4))
print("AdaBoost:", round(ada_acc, 4))
print("Gradient Boosting:", round(gb_acc, 4))


This are the results of bagging and boosting
Model Accuracy:
Random Forest: 0.9649
Bagging: 0.9561
AdaBoost: 0.9737
Gradient Boosting: 0.9561
