In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split 
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

def rfeFeature(indep_X, dep_Y, n):
    """Performs Recursive Feature Elimination (RFE) with different models."""
    rfelist = []
    
    models = [
        LogisticRegression(solver='lbfgs'),
        SVC(kernel='linear', random_state=0),
        RandomForestClassifier(n_estimators=10, criterion='entropy', random_state=0),
        DecisionTreeClassifier(criterion='gini', max_features='sqrt', splitter='best', random_state=0)
    ]
    
    for model in models:
        rfe = RFE(model, n_features_to_select=n)
        rfe.fit(indep_X, dep_Y)
        selected_features = indep_X.loc[:, rfe.support_]
        rfelist.append(selected_features)
    
    return rfelist

def split_scalar(indep_X, dep_Y):
    """Splits data into training and testing sets and applies feature scaling."""
    X_train, X_test, y_train, y_test = train_test_split(indep_X, dep_Y, test_size=0.25, random_state=0)
    
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    
    return X_train, X_test, y_train, y_test

def cm_prediction(classifier, X_test, y_test):
    """Predicts and evaluates the model performance."""
    y_pred = classifier.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred)
    cm = confusion_matrix(y_test, y_pred)
    
    return classifier, accuracy, report, X_test, y_test, cm

def train_model(classifier, X_train, y_train, X_test, y_test):
    """Trains a classifier and evaluates its performance."""
    classifier.fit(X_train, y_train)
    return cm_prediction(classifier, X_test, y_test)

def logistic(X_train, y_train, X_test, y_test):
    return train_model(LogisticRegression(random_state=0), X_train, y_train, X_test, y_test)

def svm_linear(X_train, y_train, X_test, y_test):
    return train_model(SVC(kernel='linear', random_state=0), X_train, y_train, X_test, y_test)

def svm_nl(X_train, y_train, X_test, y_test):
    return train_model(SVC(kernel='rbf', random_state=0), X_train, y_train, X_test, y_test)

def naive_bayes(X_train, y_train, X_test, y_test):
    return train_model(GaussianNB(), X_train, y_train, X_test, y_test)

def knn(X_train, y_train, X_test, y_test):
    return train_model(KNeighborsClassifier(n_neighbors=5, metric='minkowski', p=2), X_train, y_train, X_test, y_test)

def decision_tree(X_train, y_train, X_test, y_test):
    return train_model(DecisionTreeClassifier(criterion='entropy', random_state=0), X_train, y_train, X_test, y_test)

def random_forest(X_train, y_train, X_test, y_test):
    return train_model(RandomForestClassifier(n_estimators=10, criterion='entropy', random_state=0), X_train, y_train, X_test, y_test)

def rfe_classification(acclog, accsvml, accsvmnl, accknn, accnav, accdes, accrf): 
    """Creates a DataFrame comparing accuracy results of different models."""
    return pd.DataFrame({
        'Logistic': acclog,
        'SVM (Linear)': accsvml,
        'SVM (RBF)': accsvmnl,
        'KNN': accknn,
        'Naive Bayes': accnav,
        'Decision Tree': accdes,
        'Random Forest': accrf
    }, index=['Logistic', 'SVM', 'Random Forest', 'DecisionTree'])

# Load and preprocess dataset
dataset = pd.read_csv("new.csv")
df = pd.get_dummies(dataset, drop_first=True)

indep_X = df.drop('Price (in USD)', axis=1)
dep_Y = df['Price (in USD)']

# Perform Recursive Feature Elimination
rfelist = rfeFeature(indep_X, dep_Y, 3)

# Initialize accuracy lists
acclog, accsvml, accsvmnl, accknn, accnav, accdes, accrf = [], [], [], [], [], [], []

# Train models on each RFE-selected feature subset
for selected_X in rfelist:   
    X_train, X_test, y_train, y_test = split_scalar(selected_X, dep_Y)
    
    _, acc, _, _, _, _ = logistic(X_train, y_train, X_test, y_test)
    acclog.append(acc)
    
    _, acc, _, _, _, _ = svm_linear(X_train, y_train, X_test, y_test)
    accsvml.append(acc)
    
    _, acc, _, _, _, _ = svm_nl(X_train, y_train, X_test, y_test)
    accsvmnl.append(acc)
    
    _, acc, _, _, _, _ = knn(X_train, y_train, X_test, y_test)
    accknn.append(acc)
    
    _, acc, _, _, _, _ = naive_bayes(X_train, y_train, X_test, y_test)
    accnav.append(acc)
    
    _, acc, _, _, _, _ = decision_tree(X_train, y_train, X_test, y_test)
    accdes.append(acc)
    
    _, acc, _, _, _, _ = random_forest(X_train, y_train, X_test, y_test)
    accrf.append(acc)

# Generate results
result = rfe_classification(acclog, accsvml, accsvmnl, accknn, accnav, accdes, accrf)

result


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

               Logistic  SVM (Linear)  SVM (RBF)       KNN  Naive Bayes  \
Logistic       0.224719      0.241573   0.224719  0.264045     0.292135   
SVM            0.196629      0.241573   0.241573  0.235955     0.196629   
Random Forest  0.224719      0.241573   0.224719  0.264045     0.292135   
DecisionTree   0.224719      0.241573   0.224719  0.264045     0.292135   

               Decision Tree  Random Forest  
Logistic            0.297753       0.314607  
SVM                 0.275281       0.275281  
Random Forest       0.297753       0.314607  
DecisionTree        0.297753       0.314607  


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [4]:
result


Unnamed: 0,Logistic,SVM (Linear),SVM (RBF),KNN,Naive Bayes,Decision Tree,Random Forest
Logistic,0.224719,0.241573,0.224719,0.264045,0.292135,0.297753,0.314607
SVM,0.196629,0.241573,0.241573,0.235955,0.196629,0.275281,0.275281
Random Forest,0.224719,0.241573,0.224719,0.264045,0.292135,0.297753,0.314607
DecisionTree,0.224719,0.241573,0.224719,0.264045,0.292135,0.297753,0.314607
