In [50]:
from sklearn.datasets import fetch_california_housing
from sklearn.feature_selection import VarianceThreshold
import pandas as pd

# Load the California housing dataset
california_housing = fetch_california_housing()
# Convert to DataFrame for easier manipulation

california_df = pd.DataFrame(california_housing.data, columns=california_housing.feature_names)
print(california_df.head())

# Display the shape of the dataset
print(f'Original Data Shape: {california_df.shape}')




   MedInc  HouseAge  AveRooms  AveBedrms  Population  AveOccup  Latitude  \
0  8.3252      41.0  6.984127   1.023810       322.0  2.555556     37.88   
1  8.3014      21.0  6.238137   0.971880      2401.0  2.109842     37.86   
2  7.2574      52.0  8.288136   1.073446       496.0  2.802260     37.85   
3  5.6431      52.0  5.817352   1.073059       558.0  2.547945     37.85   
4  3.8462      52.0  6.281853   1.081081       565.0  2.181467     37.85   

   Longitude  
0    -122.23  
1    -122.22  
2    -122.24  
3    -122.25  
4    -122.25  
Original Data Shape: (20640, 8)


In [55]:
threshold = 0.5
vt = VarianceThreshold(threshold)

# Fit the VarianceThreshold model and transform the data
vt.fit(california_df)
transformed = vt.transform(california_df)

# Get the names of the features that are kept
features_kept = california_df.columns[vt.get_support()]
print(f'Features kept: {features_kept.tolist()}')
print(f'Transformed Data: \n {pd.DataFrame(transformed, columns=features_kept)}')


Features kept: ['MedInc', 'HouseAge', 'AveRooms', 'Population', 'AveOccup', 'Latitude', 'Longitude']
Transformed Data: 
        MedInc  HouseAge  AveRooms  Population  AveOccup  Latitude  Longitude
0      8.3252      41.0  6.984127       322.0  2.555556     37.88    -122.23
1      8.3014      21.0  6.238137      2401.0  2.109842     37.86    -122.22
2      7.2574      52.0  8.288136       496.0  2.802260     37.85    -122.24
3      5.6431      52.0  5.817352       558.0  2.547945     37.85    -122.25
4      3.8462      52.0  6.281853       565.0  2.181467     37.85    -122.25
...       ...       ...       ...         ...       ...       ...        ...
20635  1.5603      25.0  5.045455       845.0  2.560606     39.48    -121.09
20636  2.5568      18.0  6.114035       356.0  3.122807     39.49    -121.21
20637  1.7000      17.0  5.205543      1007.0  2.325635     39.43    -121.22
20638  1.8672      18.0  5.329513       741.0  2.123209     39.43    -121.32
20639  2.3886      16.0  5.25471