# L1 Regularization (Lasso) Feature Selection algorithm 

In [11]:
from sklearn.linear_model import Lasso
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
import pandas as pd
import warnings

# Load the dataset into a pandas DataFrame
dataset_path = r'C:\Users\tahsi\OneDrive\Desktop\python_ws\dataset_NoPCA.csv'
df = pd.read_csv(dataset_path)

# Separate the features and target variable
X = df.drop('classification', axis=1)
y = df['classification']

# Encode the target variable into numeric values
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# Encode categorical columns using one-hot encoding
categorical_columns = X.select_dtypes(include='object').columns
encoder = OneHotEncoder(sparse=False, handle_unknown='ignore')
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    X_encoded = pd.DataFrame(encoder.fit_transform(X[categorical_columns]))
X_encoded.columns = encoder.get_feature_names_out(categorical_columns)

# Combine the encoded categorical columns with the numerical columns
X = pd.concat([X.drop(categorical_columns, axis=1), X_encoded], axis=1)

# Initialize the Lasso model
lasso = Lasso(alpha=0.1)  # Adjust the alpha parameter as needed

# Fit the Lasso model
lasso.fit(X, y)

# Get the coefficients and nonzero feature indices
coefficients = lasso.coef_
nonzero_feature_indices = lasso.coef_ != 0

# Get the names of the selected features
selected_feature_names = X.columns[nonzero_feature_indices]

# Create a DataFrame with the selected features and target variable
selected_data = X[selected_feature_names].copy()
selected_data['classification'] = label_encoder.inverse_transform(y)

# Save the selected data as a CSV file
selected_data.to_csv('selected_dataset.csv', index=False)

# Print the selected feature names
print("Selected features:", selected_feature_names.tolist())
selected_data.to_csv(r'C:\Users\tahsi\OneDrive\Desktop\python_ws\Feature Selection\L1 Regularization\selected_dataset.csv', index=False)


Selected features: ['Unnamed: 0', 'Blue.count', 'red.count', 'total.clusters', 'area', 'Count_Nuclei', 'ImageNumber', 'Texture_Contrast_3_135', 'Texture_Entropy_3_135', 'Texture_SumVariance_3_90']
