In [1]:
import pandas as pd 
from sklearn.model_selection import train_test_split 
from sklearn.svm import SVC 
from sklearn.metrics import accuracy_score 
from sklearn.feature_selection import SequentialFeatureSelector , SelectKBest,f_classif
# Load the Iris dataset with a custom separator if needed 
try: 
 data = pd.read_csv("C:/Users/Hp/Desktop/VSCode/datalumina-project-template/data/IRIS.csv") 
except Exception as e: 
 print("Error reading the file:", e) 
 raise 
# Display the first few rows to ensure correct formatting 
print("Dataset preview:\n", data.head()) 
f_classif, 
# Verify column names and check for correct feature splitting 
print("Column names:", data.columns) 
# Adjust target column if necessary 
target_column = "species" 
if target_column not in data.columns: 
 raise KeyError(f"'{target_column}' column not found. Check the CSV for correct formatting.") 
# Ensure that all feature columns are numeric 
X = data.drop(target_column, axis=1) 
y = data[target_column] 
X = X.apply(pd.to_numeric, errors='coerce')  # Convert features to numeric, coerce errors to NaN 
# Check for and drop any rows with NaN values caused by conversion issues 
if X.isnull().values.any(): 
    print("Warning: Non-numeric data found and removed.") 
    X = X.dropna() 
    y = y[X.index]  # Align target variable with filtered rows 
# Split data into training and testing sets 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, 
random_state=42) 
# Filter Method: SelectKBest 
selector = SelectKBest(f_classif, k=2) 
X_train_filtered = selector.fit_transform(X_train, y_train) 
X_test_filtered = selector.transform(X_test) 
# Train the SVM model 
clf = SVC() 
clf.fit(X_train_filtered, y_train) 
# Make predictions and evaluate the model 
y_pred = clf.predict(X_test_filtered) 
accuracy = accuracy_score(y_test, y_pred) 
print("Accuracy with Filter Method:", accuracy) 
# Wrapper Method: Sequential Feature Selector 
sfs = SequentialFeatureSelector(SVC(), n_features_to_select=2) 
sfs.fit(X_train, y_train) 
# Get the selected features 
selected_features = X_train.columns[sfs.get_support()] 
# Train the SVM model with selected features 
X_train_selected = X_train[selected_features] 
X_test_selected = X_test[selected_features] 
clf = SVC() 
clf.fit(X_train_selected, y_train) 
# Make predictions and evaluate the model 
y_pred = clf.predict(X_test_selected) 
accuracy = accuracy_score(y_test, y_pred) 
print("Accuracy with Wrapper Method:", accuracy) 

Dataset preview:
    sepal_length  sepal_width  petal_length  petal_width      species
0           5.1          3.5           1.4          0.2  Iris-setosa
1           4.9          3.0           1.4          0.2  Iris-setosa
2           4.7          3.2           1.3          0.2  Iris-setosa
3           4.6          3.1           1.5          0.2  Iris-setosa
4           5.0          3.6           1.4          0.2  Iris-setosa
Column names: Index(['sepal_length', 'sepal_width', 'petal_length', 'petal_width',
       'species'],
      dtype='object')
Accuracy with Filter Method: 1.0
Accuracy with Wrapper Method: 1.0
