# Optimizarea performantelor modelului - Tema 1

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import RidgeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import classification_report, accuracy_score
import warnings
warnings.filterwarnings("ignore")


In [3]:
df = pd.read_csv("winequality-red.csv", sep=";")

def grupare_calitate(x):
    if x <= 4:
        return "slab"
    elif x <= 6:
        return "mediu"
    else:
        return "bun"

df["calitate"] = df["quality"].apply(grupare_calitate)
X = df.drop(columns=["quality", "calitate"])
y = df["calitate"]


In [4]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42, stratify=y
)


In [5]:
model_ridge = Pipeline([
    ("scaler", StandardScaler()),
    ("ridge", RidgeClassifier())
])

parametri_ridge = {
    "ridge__alpha": [0.01, 0.1, 1, 10, 100]
}

grid_ridge = GridSearchCV(model_ridge, parametri_ridge, cv=5, scoring="accuracy")
grid_ridge.fit(X_train, y_train)
y_pred_ridge = grid_ridge.best_estimator_.predict(X_test)

print("=== RidgeClassifier ===")
print("Cea mai buna valoare alpha:", grid_ridge.best_params_["ridge__alpha"])
print("Acuratete pe setul de test:", round(accuracy_score(y_test, y_pred_ridge), 4))
print(classification_report(y_test, y_pred_ridge, zero_division=0))


=== RidgeClassifier ===
Cea mai buna valoare alpha: 10
Acuratete pe setul de test: 0.8313
              precision    recall  f1-score   support

         bun       0.60      0.14      0.23        43
       mediu       0.84      0.98      0.91       264
        slab       0.00      0.00      0.00        13

    accuracy                           0.83       320
   macro avg       0.48      0.37      0.38       320
weighted avg       0.77      0.83      0.78       320



In [6]:
parametri_rf = {
    "n_estimators": [100, 200],
    "max_depth": [None, 10, 20],
    "min_samples_split": [2, 5]
}

grid_rf = GridSearchCV(RandomForestClassifier(random_state=42), parametri_rf, cv=5, scoring="accuracy", n_jobs=-1)
grid_rf.fit(X_train, y_train)
y_pred_rf = grid_rf.best_estimator_.predict(X_test)

print("=== RandomForestClassifier ===")
print("Cei mai buni parametri:", grid_rf.best_params_)
print("Acuratete pe setul de test:", round(accuracy_score(y_test, y_pred_rf), 4))
print(classification_report(y_test, y_pred_rf, zero_division=0))


=== RandomForestClassifier ===
Cei mai buni parametri: {'max_depth': 20, 'min_samples_split': 2, 'n_estimators': 200}
Acuratete pe setul de test: 0.8688
              precision    recall  f1-score   support

         bun       0.73      0.56      0.63        43
       mediu       0.89      0.96      0.92       264
        slab       0.00      0.00      0.00        13

    accuracy                           0.87       320
   macro avg       0.54      0.51      0.52       320
weighted avg       0.83      0.87      0.85       320



In [7]:
parametri_gb = {
    "n_estimators": [100, 200],
    "learning_rate": [0.05, 0.1],
    "max_depth": [3, 5]
}

grid_gb = GridSearchCV(GradientBoostingClassifier(random_state=42), parametri_gb, cv=5, scoring="accuracy", n_jobs=-1)
grid_gb.fit(X_train, y_train)
y_pred_gb = grid_gb.best_estimator_.predict(X_test)

print("=== GradientBoostingClassifier ===")
print("Cei mai buni parametri:", grid_gb.best_params_)
print("Acuratete pe setul de test:", round(accuracy_score(y_test, y_pred_gb), 4))
print(classification_report(y_test, y_pred_gb, zero_division=0))


=== GradientBoostingClassifier ===
Cei mai buni parametri: {'learning_rate': 0.05, 'max_depth': 5, 'n_estimators': 200}
Acuratete pe setul de test: 0.8562
              precision    recall  f1-score   support

         bun       0.66      0.63      0.64        43
       mediu       0.89      0.94      0.91       264
        slab       0.00      0.00      0.00        13

    accuracy                           0.86       320
   macro avg       0.52      0.52      0.52       320
weighted avg       0.83      0.86      0.84       320

