In [12]:
import pandas as pd
import numpy as np
import optuna

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import ElasticNet
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score

In [13]:
# 1- Veriyi oku
data = pd.read_table(r"C:\Users\borac\Desktop\LUAD_miRNA\input\TCGA_LUAD_miRNA_expression_disease_status.txt", header=0, index_col=0)

# 2- Transpoz al
data = data.transpose()

# 3- Özellikleri ve hedef değişkeni ayır
X = data.iloc[:, :-1]  # Son sütun hariç tüm sütunlar
y = data.iloc[:, -1]   # Son sütun

# 4- Y'yi sayısal değere çevir
y = pd.to_numeric(y, errors='coerce')  # Hatalı dönüşüm varsa NaN olur

# 5- Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


# 6- Standard Scale (Ölçekleme)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [17]:
# OPTUNA # 
def objective(trial):
    alpha = trial.suggest_float('alpha', 1e-4, 10.0, log=True)
    l1_ratio = trial.suggest_float('l1_ratio', 0.0, 1.0)

    model = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42,max_iter = 10000)

    # 5-fold cross-validation ile R² hesapla
    scores = cross_val_score(model, X_train_scaled, y_train, cv=5, scoring='r2')
    return scores.mean()  # maximize etmek istiyoruz


# --- Optuna Study ---
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=350, show_progress_bar=True)

# --- Sonuçlar ---
print("En iyi parametreler:")
print(study.best_params)
print(f"Ortalama CV R²: {study.best_value:.4f}")


[I 2025-10-14 10:48:50,127] A new study created in memory with name: no-name-18a9b635-ff8e-45ee-b4f2-43ca6a16da14


  0%|          | 0/350 [00:00<?, ?it/s]

[I 2025-10-14 10:48:50,182] Trial 0 finished with value: 0.1957634228881461 and parameters: {'alpha': 0.4333749134353606, 'l1_ratio': 0.17460177300963808}. Best is trial 0 with value: 0.1957634228881461.
[I 2025-10-14 10:48:53,846] Trial 1 finished with value: 0.17820349025847024 and parameters: {'alpha': 0.00024049922685618897, 'l1_ratio': 0.538937655132774}. Best is trial 0 with value: 0.1957634228881461.
[I 2025-10-14 10:48:53,905] Trial 2 finished with value: 0.37285805586742987 and parameters: {'alpha': 0.20103352124389542, 'l1_ratio': 0.18803348640203443}. Best is trial 2 with value: 0.37285805586742987.
[I 2025-10-14 10:48:54,035] Trial 3 finished with value: 0.5191349944356183 and parameters: {'alpha': 0.015440782047946279, 'l1_ratio': 0.35282594181856763}. Best is trial 3 with value: 0.5191349944356183.
[I 2025-10-14 10:48:54,078] Trial 4 finished with value: 0.10293296602099719 and parameters: {'alpha': 0.3050884039976327, 'l1_ratio': 0.35675831090503873}. Best is trial 3 wit

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[I 2025-10-14 10:49:42,154] Trial 56 finished with value: 0.4285097107219397 and parameters: {'alpha': 0.056187403315375126, 'l1_ratio': 0.003953329875714062}. Best is trial 34 with value: 0.5245308323300277.
[I 2025-10-14 10:49:43,743] Trial 57 finished with value: 0.23345490045178305 and parameters: {'alpha': 0.001549397793077259, 'l1_ratio': 0.22990791861304125}. Best is trial 34 with value: 0.5245308323300277.
[I 2025-10-14 10:49:43,792] Trial 58 finished with value: -0.0036102799227801354 and parameters: {'alpha': 2.858142919396749, 'l1_ratio': 0.4364362043686159}. Best is trial 34 with value: 0.5245308323300277.
[I 2025-10-14 10:49:43,834] Trial 59 finished with value: 0.03955458452478618 and parameters: {'alpha': 0.3625560182476541, 'l1_ratio': 0.35616577517684467}. Best is trial 34 with value: 0.5245308323300277.
[I 2025-10-14 10:49:44,044] Trial 60 finished with value: 0.5221815394447096 and parameters: {'alpha': 0.09674091843079112, 'l1_ratio': 0.04825902400049331}. Best is t

  model = cd_fast.enet_coordinate_descent(


[I 2025-10-14 10:50:08,009] Trial 98 finished with value: 0.18228727526062835 and parameters: {'alpha': 0.0001336582440782665, 'l1_ratio': 0.19101767886743493}. Best is trial 34 with value: 0.5245308323300277.
[I 2025-10-14 10:50:08,530] Trial 99 finished with value: 0.49037333668104166 and parameters: {'alpha': 0.0609703304508128, 'l1_ratio': 0.03986045755526656}. Best is trial 34 with value: 0.5245308323300277.
[I 2025-10-14 10:50:08,819] Trial 100 finished with value: 0.46841289102947703 and parameters: {'alpha': 0.010426741402548532, 'l1_ratio': 0.25224421513206885}. Best is trial 34 with value: 0.5245308323300277.
[I 2025-10-14 10:50:09,049] Trial 101 finished with value: 0.5161026504236601 and parameters: {'alpha': 0.023553518011349455, 'l1_ratio': 0.15852601464272523}. Best is trial 34 with value: 0.5245308323300277.
[I 2025-10-14 10:50:09,228] Trial 102 finished with value: 0.5250052636729622 and parameters: {'alpha': 0.036432201636576435, 'l1_ratio': 0.13389954030742357}. Best

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[I 2025-10-14 10:50:32,502] Trial 114 finished with value: 0.40466458143516404 and parameters: {'alpha': 0.017087292342107128, 'l1_ratio': 0.000417788283371745}. Best is trial 102 with value: 0.5250052636729622.
[I 2025-10-14 10:50:32,595] Trial 115 finished with value: 0.4905745164316534 and parameters: {'alpha': 0.054784299351458746, 'l1_ratio': 0.16966998771329303}. Best is trial 102 with value: 0.5250052636729622.
[I 2025-10-14 10:50:32,736] Trial 116 finished with value: 0.520934245887798 and parameters: {'alpha': 0.027241149748997214, 'l1_ratio': 0.20931743800250532}. Best is trial 102 with value: 0.5250052636729622.
[I 2025-10-14 10:50:32,980] Trial 117 finished with value: 0.5162440890507889 and parameters: {'alpha': 0.03970110105202303, 'l1_ratio': 0.095001397151184}. Best is trial 102 with value: 0.5250052636729622.
[I 2025-10-14 10:50:33,050] Trial 118 finished with value: 0.4495793380787666 and parameters: {'alpha': 0.12451770332889123, 'l1_ratio': 0.11932269923558982}. Bes

  model = cd_fast.enet_coordinate_descent(


[I 2025-10-14 10:51:29,119] Trial 338 finished with value: 0.1772441353217295 and parameters: {'alpha': 0.00024080714805627124, 'l1_ratio': 0.1455256219297721}. Best is trial 253 with value: 0.5251094370551598.
[I 2025-10-14 10:51:29,217] Trial 339 finished with value: 0.4872318867379392 and parameters: {'alpha': 0.05579948354236699, 'l1_ratio': 0.17220877354442213}. Best is trial 253 with value: 0.5251094370551598.
[I 2025-10-14 10:51:29,404] Trial 340 finished with value: 0.5245300783702389 and parameters: {'alpha': 0.04321110543580615, 'l1_ratio': 0.11257545191502963}. Best is trial 253 with value: 0.5251094370551598.
[I 2025-10-14 10:51:29,642] Trial 341 finished with value: 0.508642701163684 and parameters: {'alpha': 0.02523875936133182, 'l1_ratio': 0.1347816811925359}. Best is trial 253 with value: 0.5251094370551598.
[I 2025-10-14 10:51:29,859] Trial 342 finished with value: 0.5131080266424626 and parameters: {'alpha': 0.01764793454949894, 'l1_ratio': 0.20830598341985584}. Best 

In [18]:
# --- En iyi modeli yeniden eğit ---
best_alpha = study.best_params['alpha']
best_l1 = study.best_params['l1_ratio']

# Create an Elastic Net model
best_model = ElasticNet(alpha=best_alpha, l1_ratio=best_l1, random_state=42, max_iter = 10000)

# Train the model
best_model.fit(X_train_scaled, y_train)

In [19]:
# --- Test Performansı ---
y_pred = best_model.predict(X_test_scaled)
r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)

print(f"\nTest R²: {r2:.4f}")
print(f"Test MSE: {mse:.4f}")


Test R²: 0.6970
Test MSE: 0.0314


In [11]:
# --- Seçilen Özellikler ---
coefficients = best_model.coef_
selected_features = X.columns[coefficients != 0]
selected_df = pd.DataFrame({
    'Feature': selected_features,
    'Coefficient': coefficients[coefficients != 0]
}).sort_values(by='Coefficient', ascending=False)

In [13]:
selected_df.to_csv("C:/Users/borac/Desktop/LUAD_miRNA/output/feature_selection/elastic_net_regularization/LUAD_mirna_ElasticNetReg_SelectedFeatures.txt", sep='\t', index=False)

print(f"\nToplam seçilen özellik sayısı: {len(selected_df)}")
print(selected_df.head())


Toplam seçilen özellik sayısı: 164
          Feature  Coefficient
5     hsa-mir-126     0.020932
60   hsa-mir-4443     0.020654
126  hsa-mir-664b     0.015713
59   hsa-mir-4441     0.014200
46   hsa-mir-3677     0.012976
