In [1]:
import pandas as pd 
from sklearn.svm import SVR
from sklearn.feature_selection import RFE
from sklearn.preprocessing import StandardScaler

# Load the data
gene_expression = pd.read_csv("TCGA_LUAD_miRNA_expression_disease_status.csv", header=0, index_col=0)

# Separate features and target variable
X = gene_expression.iloc[:, :-1]  # All rows, all columns except the last
y = gene_expression.iloc[:, -1]    # All rows, only the last column (disease status)

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [3]:
random_state=42

# Create an SVM regressor
svm = SVR(kernel="linear")

# Perform SVM-RFE feature selection
selector = RFE(estimator=svm, n_features_to_select=100)
selector.fit(X_scaled, y)

# Get the ranking of features
ranking = selector.ranking_

# Get the top features
top_features = X.columns[ranking == 1]

# Print the top features
print("Top 100 Features:")
print(top_features)

top_features_df = pd.DataFrame(top_features, columns=["Features"])

top_features_df.to_csv("LUAD_miRNA_svm_rfe_selected_features_top100.csv", index=False)


Top 100 Features:
Index(['hsa-let-7a-2', 'hsa-let-7d', 'hsa-let-7i', 'hsa-mir-1226',
       'hsa-mir-124-2', 'hsa-mir-126', 'hsa-mir-1275', 'hsa-mir-1288',
       'hsa-mir-1294', 'hsa-mir-1303', 'hsa-mir-1306', 'hsa-mir-143',
       'hsa-mir-144', 'hsa-mir-146a', 'hsa-mir-184', 'hsa-mir-185',
       'hsa-mir-186', 'hsa-mir-197', 'hsa-mir-215', 'hsa-mir-30d',
       'hsa-mir-3125', 'hsa-mir-3150a', 'hsa-mir-3154', 'hsa-mir-3156-1',
       'hsa-mir-3192', 'hsa-mir-320c-2', 'hsa-mir-3614', 'hsa-mir-3619',
       'hsa-mir-3621', 'hsa-mir-3622b', 'hsa-mir-3655', 'hsa-mir-3658',
       'hsa-mir-3668', 'hsa-mir-3677', 'hsa-mir-3689a', 'hsa-mir-374b',
       'hsa-mir-376c', 'hsa-mir-378a', 'hsa-mir-378f', 'hsa-mir-378i',
       'hsa-mir-3916', 'hsa-mir-3928', 'hsa-mir-425', 'hsa-mir-4424',
       'hsa-mir-4426', 'hsa-mir-4433a', 'hsa-mir-4436b-2', 'hsa-mir-4443',
       'hsa-mir-4507', 'hsa-mir-4513', 'hsa-mir-4529', 'hsa-mir-4647',
       'hsa-mir-4649', 'hsa-mir-4659a', 'hsa-mir-4669', 'hsa-