SelectFromModel is a meta-transformer that can be used alongside any estimator that assigns importance to each feature through a specific attribute.The features are considered unimportant and removed if the corresponding importance of the feature values are below the provided threshold parameter.

In [1]:
import numpy as np
import pandas as pd

from sklearn.feature_selection import SelectFromModel
from sklearn.preprocessing import StandardScaler

from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

In [4]:
df=pd.read_csv('ANSUR II MALE Public.csv',encoding='latin-1',nrows=2000)
df.head()

Unnamed: 0,subjectid,abdominalextensiondepthsitting,acromialheight,acromionradialelength,anklecircumference,axillaheight,balloffootcircumference,balloffootlength,biacromialbreadth,bicepscircumferenceflexed,...,Branch,PrimaryMOS,SubjectsBirthLocation,SubjectNumericRace,Ethnicity,DODRace,Age,Heightin,Weightlbs,WritingPreference
0,10027,266,1467,337,222,1347,253,202,401,369,...,Combat Arms,19D,North Dakota,1,,1,41,71,180,Right hand
1,10032,233,1395,326,220,1293,245,193,394,338,...,Combat Support,68W,New York,1,,1,35,68,160,Left hand
2,10033,287,1430,341,230,1327,256,196,427,408,...,Combat Support,68W,New York,2,,2,42,68,205,Left hand
3,10092,234,1347,310,230,1239,262,199,401,359,...,Combat Service Support,88M,Wisconsin,1,,1,31,66,175,Right hand
4,10093,250,1585,372,247,1478,267,224,435,356,...,Combat Service Support,92G,North Carolina,2,,2,21,77,213,Right hand


In [5]:
df.shape

(2000, 108)

In [6]:
data=df.select_dtypes(include=np.number)

In [7]:
data.shape

(2000, 99)

In [8]:
X=data.drop(columns="Weightlbs",axis=1)
y=data["Weightlbs"]

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=24)
print(f"Train Data: {X_train.shape},{y_train.shape}")
print(f"Test Data: {X_test.shape},{y_test.shape}")

Train Data: (1600, 98),(1600,)
Test Data: (400, 98),(400,)


In [10]:
ss=StandardScaler()
scaled_train=ss.fit_transform(X_train)
scaled_test=ss.transform(X_test)

In [11]:
rf_model=RandomForestRegressor(random_state=24)
rf_model.fit(scaled_train,y_train)

In [12]:
print(f"Score:{rf_model.score(scaled_test,y_test)} ")

Score:0.9497379636263636 


In [14]:
select= SelectFromModel(RandomForestRegressor(random_state=24),threshold='mean')
#Any features having values less than mean will be ignored
select.fit(scaled_train, y_train)
X_train_selected= select.transform(scaled_train)
print("X_train.shape:{}".format(scaled_train.shape))
print("X_train_selected.shape: {}".format(X_train_selected.shape))

X_train.shape:(1600, 98)
X_train_selected.shape: (1600, 2)


In [16]:
X_test_selected= select.transform(scaled_test)
X_test_selected.shape

(400, 2)

In [17]:
rf_model=RandomForestRegressor(random_state=24)
rf_model.fit(X_train_selected,y_train)

In [18]:
print(f"Score:{rf_model.score(X_test_selected,y_test)} ")

Score:0.9410603346215063 
