In [1]:
import pandas as pd 
import seaborn as sns 

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report


In [2]:
df = sns.load_dataset("titanic")

df = df[["survived", "pclass", "sex", "age", "fare", "embarked"]]
df = df.dropna()

In [3]:
df = pd.get_dummies(df, columns=["sex", "embarked"], drop_first=True)

x = df.drop("survived", axis=1)
y = df["survived"]

In [4]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [5]:
scaler = StandardScaler()

num_cols = ["age", "fare"]

x_train[num_cols] = scaler.fit_transform(x_train[num_cols]) 
x_test[num_cols] = scaler.transform(x_test[num_cols])

# Baseline Model tanlab shu bilan train qilamiz 

In [6]:
model = LogisticRegression()

model.fit(x_train, y_train)

y_pred = model.predict(x_test)

In [7]:
print("Accuracy natijasi:", accuracy_score(y_test, y_pred))
print("\n Classificdation report: \n")
print(classification_report(y_test, y_pred))

Accuracy natijasi: 0.7972027972027972

 Classificdation report: 

              precision    recall  f1-score   support

           0       0.76      0.93      0.84        80
           1       0.87      0.63      0.73        63

    accuracy                           0.80       143
   macro avg       0.82      0.78      0.79       143
weighted avg       0.81      0.80      0.79       143



# Bu Baseline Model hisoblanadi yani bunda quyidagilar yo`q 

- Hech qanday hyperparametr yo`q 
- hech qanday murakkab model yo`q 

# Hyperparametr Tuning 

# Hyperparameter - bu algaritmning settingslari hisoblanadi, agarda biz hyperparameter bermasak  u holda algaritm o`zi default parametrlar bilan ishlashni davom qildiradi

# Decision tree misolida aytsak masalan: max_depth = 1 bersak u xolda model underfittinga moyil bo`lib qoladi agrda shu qiymatni 100 bersak u xolda model overfittinga moyil bo`liob qoladi 

# manashu muammoni oldini olsih uchun biz eng optimal xolatni aniqlashimiz kerak


# parametr va hyperparametr nima? 

# Parametr bu -  
- internal coeffitsient 
- datalardan avtomatik o`rganadfi va manual kerak emas 
- barcha ichki settingslar bu parametr xisoblanadi

# Hyperparametr
- configurations (o`zgaruvchilar)
- trainingdan oldin qoyiladi 
- training processniu nazorat qiladi
- ular datalardan o`rganmaydi 

# Hyperparameter tuning nima  - model uchun optimal bo`lgan (eng yaxshi natija beruvchi) hyperparametrlarni izlash

# Nima uchun hyperparatmetr tuning  muhim 
- underffitting va overfittingni oldini olish 
- modelning accuracy va rubostness nes baholarini yaxshilash 
- bias variance tradeoff ni kamaytirish 
- vaqtni va resusrni tejash 

# Hyperparametr tuning dan so`ng quyidagilar yaxshilanadi 
- aniqlik oshadi 
- erorrlar kamayadi 
- generalization oshadi 

# Hyperparameter Turlari 
- Manual Search 
- Grid Search 
- Random Search 
- Bayesian Optimization 
- Optuna 

# MANUAL SEARCH 

In [11]:
from sklearn.tree import DecisionTreeClassifier
best_score = 0 
best_depth = None 


for depth in range(1, 21):
    model = DecisionTreeClassifier(
        max_depth=depth,
        random_state=42
    )


    model.fit(x_train, y_train)
    preds = model.predict(x_test)
    score = accuracy_score(y_test, preds)


    if score > best_score:
        best_score = score
        best_depth = depth 

print("best_depth:", best_depth)
print("best accuracy:", best_score)

best_depth: 1
best accuracy: 0.7482517482517482


# FOYDALILIGI: 
-   kichik va sodda modellarda foydasi kam 
# ISHLATILISHI: 
- tez tajribalar, kam pareametrli oddiy modellarda asosan ishlatiladi
# Misol : max_depth = 3 yoki max_depth = 5 qiymatini Decision tree unchun sinab ko`rish 

# Afzalligi: oddiy, kutubxonaqlar kerak emas

# kamchiligi : samarasiz, eng yaxshi parametrni o`tkazib yuborishi mumkin

# GRID SEARCH 

In [12]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    "max_depth": [3, 5, 7, 10, None],
    "min_samples_split": [2, 5, 10],
    "min_samples_leaf": [1, 2, 4]
}

model = DecisionTreeClassifier(random_state=42)

grid = GridSearchCV(
    model,
    param_grid,
    cv=5,
    scoring="accuracy",
    n_jobs=1
)

grid.fit(x_train, y_train)

print("Best params :", grid.best_params_)
print("Best score:", grid.best_score_)

Best params : {'max_depth': 10, 'min_samples_leaf': 4, 'min_samples_split': 10}
Best score: 0.7978885266263003


# Foydaliligi: kichik va kam paramtrlar uchun o`rtacha natija beradi,  
# Ishlatilishi: kichik parametrlar soni bilan  toliq tekshrish
# Misol: Random Forest uchun n_estimators = [50, 100] va max_depth = [3, 5]
# Afzalligi: barcha kombinatsiyalar tekshirtiladi. 

# Random Search 

In [14]:
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint


param_dist = {
    "max_depth": randint(1, 20),
    "min_samples_split": randint(2, 20),
    "min_samples_leaf": randint(1, 10),
}


random_search = RandomizedSearchCV(
    model,
    param_dist,
    n_iter=20,
    cv=5,
    scoring="accuracy",
    n_jobs=1,
    random_state=42
)


random_search.fit(x_train, y_train)

print("Best params :", random_search.best_params_)
print("Best score:", random_search.best_score_)

Best params : {'max_depth': 6, 'min_samples_leaf': 6, 'min_samples_split': 11}
Best score: 0.7978730010867877


# Foydaliligi: Katta parametr maydoni uchun yuqori natija 
# Ishlatilishi:  katta modellarda asosan foy-i (XGBoost, Bagging, BNoosting, stacking, voting, DL da )
# Misol: n_estimator, max_depth, min_samples va shu kabi parametrlarni tasodifiy kambinatsiyalarini tekshirish
# Afzalligi nisbatan tezroq, asosan yaxshi natijalarni topib beradi

# Bayesian optimization

# Foydaliligi: o`rta va katta alagartimlarda yuqori natija beradi 
# Ishlatilishi: har bir model juda muhim bo`lghan holatlarda, ensemble Random forest, Neyron tarmoqlar 
# Misol: XGBoost modelida learning_rate va n_estimator larni optimlalshtirish 
# Afzalligi : samarali, ehtimollik modellari yordamida optimal qiymatni topadi
# Kamchiligi: kompleks, kutubxonalar zarur  

In [18]:
!pip install scikit-optimize


Collecting scikit-optimize
  Using cached scikit_optimize-0.10.2-py2.py3-none-any.whl.metadata (9.7 kB)
Collecting pyaml>=16.9 (from scikit-optimize)
  Downloading pyaml-26.2.1-py3-none-any.whl.metadata (12 kB)
Collecting PyYAML (from pyaml>=16.9->scikit-optimize)
  Using cached pyyaml-6.0.3-cp313-cp313-win_amd64.whl.metadata (2.4 kB)
Using cached scikit_optimize-0.10.2-py2.py3-none-any.whl (107 kB)
Downloading pyaml-26.2.1-py3-none-any.whl (27 kB)
Using cached pyyaml-6.0.3-cp313-cp313-win_amd64.whl (154 kB)
Installing collected packages: PyYAML, pyaml, scikit-optimize

   ---------------------------------------- 0/3 [PyYAML]
   ------------- -------------------------- 1/3 [pyaml]
   ------------- -------------------------- 1/3 [pyaml]
   -------------------------- ------------- 2/3 [scikit-optimize]
   -------------------------- ------------- 2/3 [scikit-optimize]
   -------------------------- ------------- 2/3 [scikit-optimize]
   ---------------------------------------- 3/3 [scikit-


[notice] A new release of pip is available: 25.2 -> 26.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
from skopt import BayesSearchCV


param_space = {
    "max_depth": (1, 20),
    "min_samples_split": (2, 20),
    "min_samples_leaf": (1, 10),
}


bayes = BayesSearchCV(
    model,
    param_space,
    n_iter=20,
    cv=5,
    scoring="accuracy",
    n_jobs=1,
    random_state=42
)


bayes.fit(x_train, y_train)

print("Best params :", bayes.best_params_)
print("Best score:", bayes.best_score_)

Best params : OrderedDict({'max_depth': 16, 'min_samples_leaf': 5, 'min_samples_split': 11})
Best score: 0.7996739636702375


# OPTUNA


# Foydaliligi:  katta va murakkab modellar uchun yuqori natija beradi 
# Ishlatilishi: kompleks pipeline, deep learningda 
# Misol:  RF da n_estimator , max_depth, min_samples, split, min_samples_leaf bilan birga tuning qilish
# Afzalligi : kuchli , multi objective optimization, distributed tuning qilish imkoniyati 
# kutubxona zarur, API ni o`rganishni talab qiladi

In [21]:
!pip install optuna

Collecting optuna
  Downloading optuna-4.7.0-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.18.4-py3-none-any.whl.metadata (7.2 kB)
Collecting colorlog (from optuna)
  Using cached colorlog-6.10.1-py3-none-any.whl.metadata (11 kB)
Collecting sqlalchemy>=1.4.2 (from optuna)
  Downloading sqlalchemy-2.0.46-cp313-cp313-win_amd64.whl.metadata (9.8 kB)
Collecting tqdm (from optuna)
  Downloading tqdm-4.67.3-py3-none-any.whl.metadata (57 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Using cached mako-1.3.10-py3-none-any.whl.metadata (2.9 kB)
Collecting typing-extensions>=4.12 (from alembic>=1.5.0->optuna)
  Using cached typing_extensions-4.15.0-py3-none-any.whl.metadata (3.3 kB)
Collecting greenlet>=1 (from sqlalchemy>=1.4.2->optuna)
  Downloading greenlet-3.3.1-cp313-cp313-win_amd64.whl.metadata (3.8 kB)
Collecting MarkupSafe>=0.9.2 (from Mako->alembic>=1.5.0->optuna)
  Using cached markupsafe-3.0.3-cp313-cp313-win_amd64.whl.metadata 


[notice] A new release of pip is available: 25.2 -> 26.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [22]:
import optuna 

def objective(trial):

    max_depth = trial.suggest_int("max_depth", 1, 20)
    min_saples_split = trial.suggest_int("min_saples_split", 2, 20)
    min_saples_leaf = trial.suggest_int("min_saples_leaf", 1, 10)


    model = DecisionTreeClassifier(
        max_depth=max_depth,
        min_samples_split=min_saples_split,
        min_samples_leaf=min_saples_leaf,
        random_state=42
    )

    model.fit(x_train, y_train)
    preds = model.predict(x_test)

    return accuracy_score(y_test, preds)

study = optuna.create_study(direction = "maximize")
study.optimize(objective, n_trials = 30)

print("Best params :", study.best_params)
print("Best score:", study.best_value)

  from .autonotebook import tqdm as notebook_tqdm
[32m[I 2026-02-12 10:12:01,876][0m A new study created in memory with name: no-name-1df8af5a-a77e-4cb4-b4cc-f2c8219de301[0m
[32m[I 2026-02-12 10:12:01,883][0m Trial 0 finished with value: 0.7622377622377622 and parameters: {'max_depth': 19, 'min_saples_split': 14, 'min_saples_leaf': 5}. Best is trial 0 with value: 0.7622377622377622.[0m
[32m[I 2026-02-12 10:12:01,890][0m Trial 1 finished with value: 0.7062937062937062 and parameters: {'max_depth': 15, 'min_saples_split': 3, 'min_saples_leaf': 1}. Best is trial 0 with value: 0.7622377622377622.[0m
[32m[I 2026-02-12 10:12:01,899][0m Trial 2 finished with value: 0.7482517482517482 and parameters: {'max_depth': 20, 'min_saples_split': 6, 'min_saples_leaf': 3}. Best is trial 0 with value: 0.7622377622377622.[0m
[32m[I 2026-02-12 10:12:01,909][0m Trial 3 finished with value: 0.7482517482517482 and parameters: {'max_depth': 1, 'min_saples_split': 13, 'min_saples_leaf': 5}. Best i

Best params : {'max_depth': 9, 'min_saples_split': 4, 'min_saples_leaf': 8}
Best score: 0.7832167832167832
