In [7]:
import pandas as pd
from sklearn.preprocessing import StandardScaler

df = pd.read_csv("beauty.csv",delimiter=';')
df.head()


Unnamed: 0,wage,exper,union,goodhlth,black,female,married,service,educ,looks
0,5.73,30,0,1,0,1,1,1,14,4
1,4.28,28,0,1,0,1,1,0,12,3
2,7.96,35,0,1,0,1,0,0,10,4
3,11.57,38,0,1,0,0,1,1,16,3
4,11.42,27,0,1,0,0,1,0,16,3


In [10]:
X = df.drop(columns=['looks'])

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [11]:
X_scaled

array([[-0.12378548,  0.98619536, -0.61159284, ...,  0.66829117,
         1.62854963,  0.54756497],
       [-0.43502515,  0.81895361, -0.61159284, ...,  0.66829117,
        -0.6140433 , -0.21479068],
       [ 0.35487966,  1.40429971, -0.61159284, ..., -1.49635375,
        -0.6140433 , -0.97714632],
       ...,
       [-0.64752672,  1.40429971, -0.61159284, ..., -1.49635375,
         1.62854963, -0.21479068],
       [-0.8578818 , -0.26811771, -0.61159284, ...,  0.66829117,
         1.62854963, -0.97714632],
       [-0.94159454,  0.48447013, -0.61159284, ..., -1.49635375,
         1.62854963,  1.30992061]])

In [14]:
from sklearn.cluster import KMeans

kmeans = KMeans(n_clusters=3, random_state=42)
cluster_labels = kmeans.fit_predict(X_scaled)


y_clusters = cluster_labels


In [19]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_clusters, test_size=0.2, random_state=0)


param_grid = {
    'n_estimators': [50, 100],
    'max_depth': [None, 5, 10]
}

grid = GridSearchCV(RandomForestClassifier(random_state=0), param_grid, cv=5)
grid.fit(X_train, y_train)
y_pred = grid.predict(X_test)

print("Best Params:", grid.best_params_)
print(classification_report(y_test, y_pred))


Best Params: {'max_depth': None, 'n_estimators': 50}
              precision    recall  f1-score   support

           0       0.96      0.99      0.97        92
           1       0.98      0.87      0.92        52
           2       0.96      0.99      0.98       108

    accuracy                           0.96       252
   macro avg       0.97      0.95      0.96       252
weighted avg       0.96      0.96      0.96       252



In [18]:
from sklearn.svm import SVC


param_grid_svc = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'rbf'],
    'gamma': ['scale', 'auto']
}

svc_grid = GridSearchCV(SVC(), param_grid_svc, cv=5)
svc_grid.fit(X_train, y_train)
y_pred_svc = svc_grid.predict(X_test)

print(" SVM Best Parameters:", svc_grid.best_params_)
print("SVM Classification Report:\n", classification_report(y_test, y_pred_svc))


 SVM Best Parameters: {'C': 10, 'gamma': 'scale', 'kernel': 'linear'}
SVM Classification Report:
               precision    recall  f1-score   support

           0       0.96      0.98      0.97        92
           1       0.96      0.98      0.97        52
           2       1.00      0.97      0.99       108

    accuracy                           0.98       252
   macro avg       0.97      0.98      0.98       252
weighted avg       0.98      0.98      0.98       252



In [17]:
from sklearn.neighbors import KNeighborsClassifier


param_grid_knn = {
    'n_neighbors': [3, 5, 7, 9],
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan']
}

knn_grid = GridSearchCV(KNeighborsClassifier(), param_grid_knn, cv=5)
knn_grid.fit(X_train, y_train)
y_pred_knn = knn_grid.predict(X_test)

print(" KNN Best Parameters:", knn_grid.best_params_)
print(" KNN Classification Report:\n", classification_report(y_test, y_pred_knn))


 KNN Best Parameters: {'metric': 'manhattan', 'n_neighbors': 3, 'weights': 'distance'}
 KNN Classification Report:
               precision    recall  f1-score   support

           0       0.93      0.97      0.95        92
           1       0.88      0.88      0.88        52
           2       0.96      0.93      0.94       108

    accuracy                           0.93       252
   macro avg       0.92      0.93      0.92       252
weighted avg       0.93      0.93      0.93       252

