<a href="https://colab.research.google.com/github/TheKrazyKiwi29/Sreejit-Skill/blob/main/skill78.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# Import necessary libraries
import pandas as pd
from sklearn.datasets import load_wine
from sklearn.feature_selection import VarianceThreshold, SelectKBest, chi2, f_classif, mutual_info_classif
from sklearn.preprocessing import StandardScaler

# Load the wine dataset
wine = load_wine()
X = pd.DataFrame(wine.data, columns=wine.feature_names)
y = wine.target

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Initialize VarianceThreshold with a threshold value
selector = VarianceThreshold(threshold=0.2)
X_variance = selector.fit_transform(X_scaled)

# Get the selected feature names
selected_features_variance = X.columns[selector.get_support()]
print("Selected features using Variance Threshold:", selected_features_variance.tolist())

# Initialize SelectKBest with chi2
chi2_selector = SelectKBest(chi2, k=5)
X_chi2 = chi2_selector.fit_transform(X, y)

# Get the selected feature names
selected_features_chi2 = X.columns[chi2_selector.get_support()]
print("Selected features using Chi-Square Test:", selected_features_chi2.tolist())

# Initialize SelectKBest with ANOVA F-Value
anova_selector = SelectKBest(f_classif, k=5)
X_anova = anova_selector.fit_transform(X, y)

# Get the selected feature names
selected_features_anova = X.columns[anova_selector.get_support()]
print("Selected features using ANOVA F-Value:", selected_features_anova.tolist())

# Initialize SelectKBest with Information Gain
info_gain_selector = SelectKBest(mutual_info_classif, k=5)
X_info_gain = info_gain_selector.fit_transform(X, y)

# Get the selected feature names
selected_features_info_gain = X.columns[info_gain_selector.get_support()]
print("Selected features using Information Gain:", selected_features_info_gain.tolist())

import numpy as np

# Calculate the correlation matrix
corr_matrix = pd.DataFrame(X_scaled, columns=X.columns).corr().abs()

# Select upper triangle of correlation matrix
upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))

# Find features with correlation greater than 0.8
to_drop = [column for column in upper.columns if any(upper[column] > 0.8)]

# Drop features
X_corr = pd.DataFrame(X_scaled, columns=X.columns).drop(columns=to_drop)

print("Selected features using Pearson's Correlation Coefficient:", X_corr.columns.tolist())

# Convert numpy arrays back to DataFrames for saving
X_variance_df = pd.DataFrame(X_variance, columns=selected_features_variance)
X_chi2_df = pd.DataFrame(X_chi2, columns=selected_features_chi2)
X_anova_df = pd.DataFrame(X_anova, columns=selected_features_anova)
X_info_gain_df = pd.DataFrame(X_info_gain, columns=selected_features_info_gain)
X_corr_df = pd.DataFrame(X_corr, columns=X_corr.columns)

# Save to CSV
X_variance_df.to_csv('wine_variance_filtered.csv', index=False)
X_chi2_df.to_csv('wine_chi2_filtered.csv', index=False)
X_anova_df.to_csv('wine_anova_filtered.csv', index=False)
X_info_gain_df.to_csv('wine_info_gain_filtered.csv', index=False)
X_corr_df.to_csv('wine_corr_filtered.csv', index=False)



Selected features using Variance Threshold: ['alcohol', 'malic_acid', 'ash', 'alcalinity_of_ash', 'magnesium', 'total_phenols', 'flavanoids', 'nonflavanoid_phenols', 'proanthocyanins', 'color_intensity', 'hue', 'od280/od315_of_diluted_wines', 'proline']
Selected features using Chi-Square Test: ['alcalinity_of_ash', 'magnesium', 'flavanoids', 'color_intensity', 'proline']
Selected features using ANOVA F-Value: ['alcohol', 'flavanoids', 'color_intensity', 'od280/od315_of_diluted_wines', 'proline']
Selected features using Information Gain: ['flavanoids', 'color_intensity', 'hue', 'od280/od315_of_diluted_wines', 'proline']
Selected features using Pearson's Correlation Coefficient: ['alcohol', 'malic_acid', 'ash', 'alcalinity_of_ash', 'magnesium', 'total_phenols', 'nonflavanoid_phenols', 'proanthocyanins', 'color_intensity', 'hue', 'od280/od315_of_diluted_wines', 'proline']
