In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


In [2]:
def split_scalar(indep_x, dep_Y):
    # Split the data into training and testing sets
    x_train, x_test, y_train, y_test = train_test_split(indep_x, dep_Y, test_size=0.30, random_state=0)
    sc = StandardScaler() 
    # Scale the training data
    x_train = sc.fit_transform(x_train)
    # Scale the testing data using the scaler "sc"
    x_test = sc.transform(x_test)
    return x_train, x_test, y_train, y_test

In [3]:
def lda(indep_x, dep_Y, n):
    # Apply LDA
    lda = LDA(n_components=n)
    x_lda = lda.fit_transform(indep_x, dep_Y)
    return x_lda

In [4]:
def cm_prediction(classifier, X_test, y_test):
    y_pred = classifier.predict(X_test)
    cm = confusion_matrix(y_test, y_pred)
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred)
    return classifier, accuracy, report, cm


In [5]:
def logistic(X_train, y_train, X_test, y_test):
    classifier = LogisticRegression(random_state=0)
    classifier.fit(X_train, y_train)
    return cm_prediction(classifier, X_test, y_test)


In [6]:
def svm_linear(X_train, y_train, X_test, y_test):
    classifier = SVC(kernel='linear', random_state=0)
    classifier.fit(X_train, y_train)
    return cm_prediction(classifier, X_test, y_test)

In [7]:
def svm_NL(X_train, y_train, X_test, y_test):
    classifier = SVC(kernel='rbf', random_state=0)
    classifier.fit(X_train, y_train)
    return cm_prediction(classifier, X_test, y_test)


In [8]:
def Navie(X_train, y_train, X_test, y_test):
    classifier = GaussianNB()
    classifier.fit(X_train, y_train)
    return cm_prediction(classifier, X_test, y_test)

In [9]:
def knn(X_train, y_train, X_test, y_test):
    classifier = KNeighborsClassifier(n_neighbors=5, metric='minkowski', p=2)
    classifier.fit(X_train, y_train)
    return cm_prediction(classifier, X_test, y_test)


In [10]:
def Decision(X_train, y_train, X_test, y_test):
    classifier = DecisionTreeClassifier(criterion='entropy', random_state=0)
    classifier.fit(X_train, y_train)
    return cm_prediction(classifier, X_test, y_test)


In [11]:
def random(X_train, y_train, X_test, y_test):
    classifier = RandomForestClassifier(n_estimators=10, criterion='entropy', random_state=0)
    classifier.fit(X_train, y_train)
    return cm_prediction(classifier, X_test, y_test)


In [12]:
def selectk_Classification(acclog, accsvml, accsvmnl, accknn, accnav, accdes, accrf):
    dataframe = pd.DataFrame(index=['LDA'], columns=['Logistic', 'SVMl', 'SVMnl', 'KNN', 'Navie', 'Decision', 'Random'])
    for number, idx in enumerate(dataframe.index):
        dataframe['Logistic'][idx] = acclog[number]
        dataframe['SVMl'][idx] = accsvml[number]
        dataframe['SVMnl'][idx] = accsvmnl[number]
        dataframe['KNN'][idx] = accknn[number]
        dataframe['Navie'][idx] = accnav[number]
        dataframe['Decision'][idx] = accdes[number]
        dataframe['Random'][idx] = accrf[number]
    return dataframe

In [13]:
# Load dataset
dataset1=pd.read_csv("Car_Rating data.csv",index_col=None)
X = dataset1.iloc[:, 0:13].values
y = dataset1.iloc[:, -1].values

In [14]:
df1= pd.get_dummies(dataset1, drop_first=True)
df1

Unnamed: 0,Index,Overall Rating,Exterior,Interior,Ride Quality,Airbags,Price.1,Car_Audi A4,Car_Audi Q3,Car_BMW 2 Series Gran Coupe,...,Color_Red,Color_Silver,Color_Sky blue,Color_White,Color_Yellow,Fuel type_Diesel,Fuel type_Hybrid,Fuel type_LPG,Fuel type_Petrol,Fuel type_Plug-in Hybrid
0,0,4.4,4.4,4.4,4.4,12,13328,False,False,False,...,False,True,False,False,False,False,True,False,False,False
1,1,4.2,4.0,3.8,4.2,8,16621,False,False,False,...,False,False,False,False,False,False,False,False,True,False
2,2,3.5,2.7,3.0,2.3,2,8467,False,False,False,...,False,False,False,False,False,False,False,False,True,False
3,3,4.4,4.4,4.4,4.4,0,3607,False,False,False,...,False,False,False,True,False,False,True,False,False,False
4,4,4.2,4.0,3.8,4.2,4,11726,False,False,False,...,False,True,False,False,False,False,False,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
167,167,0.0,,,,4,20005,False,False,False,...,False,False,False,True,False,False,False,False,True,False
168,168,0.0,,,,8,36110,False,False,False,...,False,True,False,False,False,False,False,False,True,False
169,169,0.0,,,,4,7840,False,False,False,...,False,False,False,False,False,False,False,False,True,False
170,170,0.0,,,,12,470,False,False,False,...,False,False,False,False,False,False,False,False,True,False


In [15]:
print(dataset1.columns)

Index(['Index', 'Car', 'Price', 'Overall Rating', 'Exterior', 'Interior',
       'Ride Quality', 'Color', 'Airbags', 'Fuel type', 'Price.1'],
      dtype='object')


In [16]:
print(dataset1.shape)

(172, 11)


In [18]:
df1["Exterior"].fillna(0, inplace=True)
df1["Interior"].fillna(0, inplace=True)
df1["Ride Quality"].fillna(0, inplace=True)

In [38]:
# Assuming you have already preprocessed the data and have independent and dependent variables
indep_x=df1.drop('Price.1',axis=1)
dep_Y=df1['Price.1']

In [90]:
lda = LDA(n_components=1)
indep_x_lda = lda.fit_transform(indep_x, dep_Y)

In [91]:
lda = LDA(n_components=1)
acclog=[]
accsvml=[]
accsvmnl=[]
accknn=[]
accnav=[]
accdes=[]
accrf=[]
lda 

In [92]:
x_train, x_test, y_train, y_test = split_scalar(indep_x_lda, dep_Y)
classifier, accuracy, report, cm = logistic(x_train, y_train, x_test, y_test)
acclog.append(accuracy)

classifier, accuracy, report, cm = svm_linear(x_train, y_train, x_test, y_test)
accsvml.append(accuracy)

classifier, accuracy, report, cm = svm_NL(x_train, y_train, x_test, y_test)
accsvmnl.append(accuracy)

classifier, accuracy, report, cm = knn(x_train, y_train, x_test, y_test)
accknn.append(accuracy)

classifier, accuracy, report, cm = Navie(x_train, y_train, x_test, y_test)
accnav.append(accuracy)

classifier, accuracy, report, cm = Decision(x_train, y_train, x_test, y_test)
accdes.append(accuracy)

classifier, accuracy, report, cm = random(x_train, y_train, x_test, y_test)
accrf.append(accuracy)

# Get results
result = selectk_Classification(acclog, accsvml, accsvmnl, accknn, accnav, accdes, accrf)
print(result)

     Logistic      SVMl     SVMnl       KNN     Navie  Decision    Random
LDA  0.057692  0.057692  0.038462  0.057692  0.076923  0.076923  0.057692


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

In [77]:
result
#5

Unnamed: 0,Logistic,SVMl,SVMnl,KNN,Navie,Decision,Random
LDA,0.076923,0.115385,0.057692,0.115385,0.115385,0.173077,0.173077


# Decision Tree and Random Forest show the highest performance with a score of 0.173077.
# SVM (linear), KNN, and Naive Bayes have a performance score of 0.115385.
# Logistic and SVM (nonlinear) have scores of 0.076923 and 0.057692, respectively

In [81]:
result
#4

Unnamed: 0,Logistic,SVMl,SVMnl,KNN,Navie,Decision,Random
LDA,0.076923,0.096154,0.076923,0.211538,0.115385,0.153846,0.173077


# KNN shows the highest performance with a score of 0.211538.
# Random Forest has a score of 0.173077.
# Decision Tree has a score of 0.153846.
# SVM (linear), SVM (nonlinear), Logistic, and Naive Bayes show varying scores with KNN being the highest among them.

In [85]:
result
#3

Unnamed: 0,Logistic,SVMl,SVMnl,KNN,Navie,Decision,Random
LDA,0.076923,0.096154,0.076923,0.211538,0.134615,0.192308,0.134615


# KNN shows the highest performance with a score of 0.211538.
# Decision Tree follows with a score of 0.192308.
# Naive Bayes and Random Forest have scores of 0.134615.
# SVM (linear) and SVM (nonlinear) show moderate performance.

In [89]:
result
#2

Unnamed: 0,Logistic,SVMl,SVMnl,KNN,Navie,Decision,Random
LDA,0.038462,0.076923,0.076923,0.192308,0.134615,0.115385,0.096154


# KNN shows the highest performance with a score of 0.192308.
# Naive Bayes follows with a score of 0.134615.
# SVM (linear) and SVM (nonlinear) show moderate performance.

In [93]:
result
#1

Unnamed: 0,Logistic,SVMl,SVMnl,KNN,Navie,Decision,Random
LDA,0.057692,0.057692,0.038462,0.057692,0.076923,0.076923,0.057692


# Naive Bayes and Decision Tree show the highest performance with a score of 0.076923.
# Logistic, SVM (linear), KNN, and Random Forest have a performance score of 0.057692.
# SVM (nonlinear) has the lowest score of 0.038462.