In [10]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import classification_report

In [4]:
df = pd.read_csv("beauty.csv", delimiter=';')

In [5]:
X = df.drop(columns=["looks"])

In [9]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [6]:
kmeans = KMeans(n_clusters=3, random_state=42)
df['Cluster_KMeans'] = kmeans.fit_predict(X_scaled)

In [7]:
models_params = {
    "LogisticRegression": {
        "model": LogisticRegression(max_iter=1000),
        "params": {
            "C": [0.1, 1, 10],
            "solver": ['liblinear', 'lbfgs']
        }
    },
    "RandomForest": {
        "model": RandomForestClassifier(),
        "params": {
            "n_estimators": [50, 100],
            "max_depth": [5, 10, None]
        }
    },
    "SVC": {
        "model": SVC(),
        "params": {
            "C": [0.1, 1, 10],
            "kernel": ['linear', 'rbf']
        }
    }
}

In [None]:
for name, mp in models_params.items():
    print(f"GridSearch for {name}")
    grid = GridSearchCV(mp["model"], mp["params"], cv=5, scoring='accuracy')
    grid.fit(X_train, y_train)
    y_pred = grid.predict(X_test)
    print(f"Best Estimator for {name}: {grid.best_estimator_}")
    print(classification_report(y_test, y_pred))
    print("-" * 50)

Running GridSearch for LogisticRegression
Best Estimator for LogisticRegression: LogisticRegression(C=10, max_iter=1000)
              precision    recall  f1-score   support

           0       0.98      1.00      0.99       134
           1       1.00      0.99      0.99        85
           2       1.00      0.99      0.99       159

    accuracy                           0.99       378
   macro avg       0.99      0.99      0.99       378
weighted avg       0.99      0.99      0.99       378

--------------------------------------------------
Running GridSearch for RandomForest
Best Estimator for RandomForest: RandomForestClassifier()
              precision    recall  f1-score   support

           0       0.98      0.98      0.98       134
           1       1.00      0.93      0.96        85
           2       0.95      0.99      0.97       159

    accuracy                           0.97       378
   macro avg       0.98      0.96      0.97       378
weighted avg       0.97    