In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_selection import SelectKBest, chi2, f_classif
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.feature_selection import RFE
from sklearn.metrics import accuracy_score

# Load the dataset
df = pd.read_csv('/content/Maternal Health Risk Data Set.csv')

# Encode target variable 'RiskLevel'
le = LabelEncoder()
df['RiskLevel'] = le.fit_transform(df['RiskLevel'])

# Split dataset into features and target
X = df.drop('RiskLevel', axis=1)
y = df['RiskLevel']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Feature selection using ANOVA (F-test)
anova_selector = SelectKBest(f_classif, k=5)
X_train_anova = anova_selector.fit_transform(X_train, y_train)
X_test_anova = anova_selector.transform(X_test)

# Feature selection using Chi-Square
chi2_selector = SelectKBest(chi2, k=5)
X_train_chi2 = chi2_selector.fit_transform(X_train, y_train)
X_test_chi2 = chi2_selector.transform(X_test)

# Feature selection using Recursive Feature Elimination (RFE) with RandomForestClassifier
rfe_selector = RFE(estimator=RandomForestClassifier(), n_features_to_select=5)
X_train_rfe = rfe_selector.fit_transform(X_train, y_train)
X_test_rfe = rfe_selector.transform(X_test)

# Train and evaluate models using Random Forest and Decision Tree with the selected features

# 1. Random Forest with ANOVA-selected features
rf_anova = RandomForestClassifier()
rf_anova.fit(X_train_anova, y_train)
y_pred_rf_anova = rf_anova.predict(X_test_anova)
accuracy_rf_anova = accuracy_score(y_test, y_pred_rf_anova)

# 2. Decision Tree with ANOVA-selected features
dt_anova = DecisionTreeClassifier()
dt_anova.fit(X_train_anova, y_train)
y_pred_dt_anova = dt_anova.predict(X_test_anova)
accuracy_dt_anova = accuracy_score(y_test, y_pred_dt_anova)

# 3. Random Forest with Chi-Square-selected features
rf_chi2 = RandomForestClassifier()
rf_chi2.fit(X_train_chi2, y_train)
y_pred_rf_chi2 = rf_chi2.predict(X_test_chi2)
accuracy_rf_chi2 = accuracy_score(y_test, y_pred_rf_chi2)

# 4. Decision Tree with Chi-Square-selected features
dt_chi2 = DecisionTreeClassifier()
dt_chi2.fit(X_train_chi2, y_train)
y_pred_dt_chi2 = dt_chi2.predict(X_test_chi2)
accuracy_dt_chi2 = accuracy_score(y_test, y_pred_dt_chi2)

# 5. Random Forest with RFE-selected features
rf_rfe = RandomForestClassifier()
rf_rfe.fit(X_train_rfe, y_train)
y_pred_rf_rfe = rf_rfe.predict(X_test_rfe)
accuracy_rf_rfe = accuracy_score(y_test, y_pred_rf_rfe)

# 6. Decision Tree with RFE-selected features
dt_rfe = DecisionTreeClassifier()
dt_rfe.fit(X_train_rfe, y_train)
y_pred_dt_rfe = dt_rfe.predict(X_test_rfe)
accuracy_dt_rfe = accuracy_score(y_test, y_pred_dt_rfe)

# Store results in a dataframe for comparison
results = pd.DataFrame({
    'Model': ['Random Forest (ANOVA)', 'Decision Tree (ANOVA)',
              'Random Forest (Chi-Square)', 'Decision Tree (Chi-Square)',
              'Random Forest (RFE)', 'Decision Tree (RFE)'],
    'Accuracy': [accuracy_rf_anova, accuracy_dt_anova,
                 accuracy_rf_chi2, accuracy_dt_chi2,
                 accuracy_rf_rfe, accuracy_dt_rfe]
})

# Display the results
print(results)

# Get the selected features from each feature selection method and display them

# Get feature names
feature_names = X.columns

# ANOVA selected features
anova_selected_features = feature_names[anova_selector.get_support()]
anova_selected_features_list = list(anova_selected_features)

# Chi-Square selected features
chi2_selected_features = feature_names[chi2_selector.get_support()]
chi2_selected_features_list = list(chi2_selected_features)

# RFE selected features
rfe_selected_features = feature_names[rfe_selector.get_support()]
rfe_selected_features_list = list(rfe_selected_features)

# Create a dataframe to display the selected features for each method
selected_features_df = pd.DataFrame({
    'ANOVA': anova_selected_features_list,
    'Chi-Square': chi2_selected_features_list,
    'RFE': rfe_selected_features_list
})

# Display the dataframe
print(selected_features_df)



                        Model  Accuracy
0       Random Forest (ANOVA)  0.809836
1       Decision Tree (ANOVA)  0.800000
2  Random Forest (Chi-Square)  0.796721
3  Decision Tree (Chi-Square)  0.790164
4         Random Forest (RFE)  0.816393
5         Decision Tree (RFE)  0.793443
         ANOVA   Chi-Square          RFE
0          Age          Age          Age
1   SystolicBP   SystolicBP   SystolicBP
2  DiastolicBP  DiastolicBP  DiastolicBP
3           BS           BS           BS
4    HeartRate    HeartRate    HeartRate
