In [11]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

def split_scalar(indep_X, dep_Y):
    X_train, X_test, y_train, y_test = train_test_split(indep_X, dep_Y, test_size=0.25, random_state=0)
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)
    return X_train, X_test, y_train, y_test

def accuracy_prediction(classifier, X_test, y_test):
    y_pred = classifier.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    return accuracy

def Logistic(X_train, y_train, X_test):       
    classifier = LogisticRegression()
    classifier.fit(X_train, y_train)
    accuracy = accuracy_prediction(classifier, X_test, y_test)
    return accuracy

def SVM_linear(X_train, y_train, X_test):                
    classifier = SVC(kernel='linear')
    classifier.fit(X_train, y_train)
    accuracy = accuracy_prediction(classifier, X_test, y_test)
    return accuracy

def SVM_NL(X_train, y_train, X_test):                
    classifier = SVC(kernel='rbf')
    classifier.fit(X_train, y_train)
    accuracy = accuracy_prediction(classifier, X_test, y_test)
    return accuracy

def Decision(X_train, y_train, X_test):        
    classifier = DecisionTreeClassifier(random_state=0)
    classifier.fit(X_train, y_train)
    accuracy = accuracy_prediction(classifier, X_test, y_test)
    return accuracy

def Random(X_train, y_train, X_test):       
    classifier = RandomForestClassifier(n_estimators=10, random_state=0)
    classifier.fit(X_train, y_train)
    accuracy = accuracy_prediction(classifier, X_test, y_test)
    return accuracy

def backward_selection_classification(indep_X, dep_Y):
    selected_features = list(indep_X.columns)
    best_accuracy = 0

    while len(selected_features) > 1:
        accuracy_list = []
        for feature in selected_features:
            remaining_features = [feat for feat in selected_features if feat != feature]
            X_train, X_test, y_train, y_test = split_scalar(indep_X[remaining_features], dep_Y)
            accuracy = Logistic(X_train, y_train, X_test)  # You can replace with any classification function you want
            accuracy_list.append((feature, accuracy))

        accuracy_list.sort(key=lambda x: x[1], reverse=True)
        best_feature, best_accuracy = accuracy_list[0]

        if best_accuracy >= 0.85:  # You can adjust the threshold for stopping
            selected_features.remove(best_feature)
        else:
            break

    return selected_features

dataset1 = pd.read_csv("prep.csv", index_col=None)
df2 = pd.get_dummies(dataset1, drop_first=True)

indep_X = df2.drop("classification_yes", axis=1)
dep_Y = df2['classification_yes']

selected_features = backward_selection_classification(indep_X, dep_Y)
X_train, X_test, y_train, y_test = split_scalar(df2[selected_features], dep_Y)

accuracy_logistic = Logistic(X_train, y_train, X_test)
accuracy_svm_linear = SVM_linear(X_train, y_train, X_test)
accuracy_svm_nl = SVM_NL(X_train, y_train, X_test)
accuracy_decision_tree = Decision(X_train, y_train, X_test)
accuracy_random_forest = Random(X_train, y_train, X_test)

# Create a table using pandas DataFrame
result = pd.DataFrame({'Model': ['Logistic Regression', 'SVM Linear', 'SVM Non-Linear', 'Decision Tree', 'Random Forest'],
                             'Accuracy': [accuracy_logistic, accuracy_svm_linear, accuracy_svm_nl, accuracy_decision_tree, accuracy_random_forest]})





In [12]:
result

Unnamed: 0,Model,Accuracy
0,Logistic Regression,0.89
1,SVM Linear,0.89
2,SVM Non-Linear,0.89
3,Decision Tree,0.89
4,Random Forest,0.89
