## Filler Methods

## 1. Univariate Feature Selection (Anova F-Test)

In [1]:
from sklearn.datasets import load_wine
import pandas as pd
import numpy as np

In [5]:
data = load_wine()
X = pd.DataFrame(data.data,columns = data.feature_names)
y = pd.Series(data.target,name='target')

In [6]:
X.head()

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline
0,14.23,1.71,2.43,15.6,127.0,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065.0
1,13.2,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050.0
2,13.16,2.36,2.67,18.6,101.0,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185.0
3,14.37,1.95,2.5,16.8,113.0,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480.0
4,13.24,2.59,2.87,21.0,118.0,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735.0


In [7]:
y.head()

0    0
1    0
2    0
3    0
4    0
Name: target, dtype: int64

In [8]:
print("Features:", X.columns.tolist())
print("Class distribution:", np.bincount(y))

Features: ['alcohol', 'malic_acid', 'ash', 'alcalinity_of_ash', 'magnesium', 'total_phenols', 'flavanoids', 'nonflavanoid_phenols', 'proanthocyanins', 'color_intensity', 'hue', 'od280/od315_of_diluted_wines', 'proline']
Class distribution: [59 71 48]


In [11]:
## Anova F-test
from sklearn.feature_selection import SelectKBest,f_classif

#select top 5 features using Anova F-test
selector = SelectKBest(score_func=f_classif,k=5)
X_new = selector.fit_transform(X,y)

#Get selected features
selected_features = X.columns[selector.get_support()]
print(f"Top 5 Features Anova:{selected_features.tolist()}")

#View F-scores and p-value
feature_score = pd.DataFrame({
    'Feature': X.columns,
    'F-Score': selector.scores_,
    'p-value': selector.pvalues_
}).sort_values(by='F-Score',ascending=False)



# Print in your desired format
for index, row in feature_score.head(5).iterrows():
    feature = row['Feature']
    f_stat = round(row['F-Score'], 3)
    p_val = round(row['p-value'], 3) if row['p-value'] >= 0.001 else 0.000
    print(f"{feature}: F-stat={f_stat}, p-value={p_val}")

Top 5 Features Anova:['alcohol', 'flavanoids', 'color_intensity', 'od280/od315_of_diluted_wines', 'proline']
flavanoids: F-stat=233.926, p-value=0.0
proline: F-stat=207.92, p-value=0.0
od280/od315_of_diluted_wines: F-stat=189.972, p-value=0.0
alcohol: F-stat=135.078, p-value=0.0
color_intensity: F-stat=120.664, p-value=0.0
