# Relief Feature Selection

![Relief.jpg](attachment:df760723-fde2-406f-b7ae-0af361e95cf4.jpg)

# Import Library

Dependencies : notebook, numpy, pandas, scikit-learn

In [1]:
import sys
sys.path.append('../')  # Add the parent directory to the Python path

import warnings
warnings.filterwarnings("ignore") # Ignore all warnings

import numpy as np
import pandas as pd

from Methods.ReliefF import ReliefF

# Load Data

In [2]:
test = pd.read_csv("../data/test.csv")
train = pd.read_csv("../data/train.csv")

In [3]:
train

Unnamed: 0,id,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,MedHouseVal
0,0,2.3859,15.0,3.827160,1.112100,1280.0,2.486989,34.60,-120.12,0.980
1,1,3.7188,17.0,6.013373,1.054217,1504.0,3.813084,38.69,-121.22,0.946
2,2,4.7750,27.0,6.535604,1.103175,1061.0,2.464602,34.71,-120.45,1.576
3,3,2.4138,16.0,3.350203,0.965432,1255.0,2.089286,32.66,-117.09,1.336
4,4,3.7500,52.0,4.284404,1.069246,1793.0,1.604790,37.80,-122.41,4.500
...,...,...,...,...,...,...,...,...,...,...
37132,37132,3.3438,50.0,4.936508,1.079365,1775.0,3.022222,34.19,-118.36,1.856
37133,37133,3.7308,26.0,5.087533,0.966019,1006.0,4.316901,37.32,-121.86,1.588
37134,37134,4.1716,52.0,4.678862,1.101485,1156.0,1.431734,37.75,-122.44,3.387
37135,37135,2.7143,16.0,5.710074,1.068376,584.0,2.803659,38.40,-120.98,1.592


# Feature Selection

In [4]:
X = train.drop(columns=["MedHouseVal"], axis=1)
y = train["MedHouseVal"]

# Convert the DataFrame to NumPy arrays
X = X.to_numpy()
y = y.to_numpy()

# Print the shapes
print("Feature matrix shape:", X.shape)
print("Target array shape:", y.shape)

Feature matrix shape: (37137, 9)
Target array shape: (37137,)


In [5]:
# Create an instance of the ReliefF class
n_neighbors = 3  # Number of neighbors to consider
n_features_to_keep = 5  # Number of features to keep
reliefF = ReliefF(n_neighbors=n_neighbors, n_features_to_keep=n_features_to_keep)

In [6]:
# Fit the model on your training data
reliefF.fit(X, y)

# Transform the feature matrix to keep only the top features
X_reduced = reliefF.transform(X)

# Print the reduced feature matrix
print("Original shape:", X.shape)
print("Reduced shape:", X_reduced.shape)
print("Top features:", reliefF.top_features[:n_features_to_keep])

Original shape: (37137, 9)
Reduced shape: (37137,)
Top features: [0 3 6 1 4]


In [7]:
train.iloc[:,:n_features_to_keep].columns

Index(['id', 'MedInc', 'HouseAge', 'AveRooms', 'AveBedrms'], dtype='object')