### **Importing Libraries**

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


### **Loading Dataset:**

In [None]:
zip_file_path = '/content/drive/MyDrive/car_eval.zip'
import zipfile
import os

# Directory where you want to extract the contents
extract_to_directory = '/content/drive/MyDrive/car_eval'

# Ensure the directory exists
os.makedirs(extract_to_directory, exist_ok=True)

# Extract the specific file
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    # List all files in the ZIP archive
    print("Files in ZIP:", zip_ref.namelist())

    # Extract only 'train.csv' to the specified directory
    zip_ref.extract('car_eval_dataset.csv', extract_to_directory)

print(f"car_eval_dataset.csv has been extracted to {extract_to_directory}")


Files in ZIP: ['car_eval_dataset.csv']
car_eval_dataset.csv has been extracted to /content/drive/MyDrive/car_eval


In [None]:
csv_file_path = '/content/drive/MyDrive/car_eval/car_eval_dataset.csv'
df = pd.read_csv(csv_file_path)

## **Examining Dataset:**

In [None]:
df.head()

Unnamed: 0.1,Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,class
0,0,vhigh,vhigh,2,2,small,low,unacc
1,1,vhigh,vhigh,2,2,small,med,unacc
2,2,vhigh,vhigh,2,2,small,high,unacc
3,3,vhigh,vhigh,2,2,med,low,unacc
4,4,vhigh,vhigh,2,2,med,med,unacc


In [None]:
df.head()

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,class
0,vhigh,vhigh,2,2,small,low,unacc
1,vhigh,vhigh,2,2,small,med,unacc
2,vhigh,vhigh,2,2,small,high,unacc
3,vhigh,vhigh,2,2,med,low,unacc
4,vhigh,vhigh,2,2,med,med,unacc


In [None]:
df.describe()

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,class
count,1728,1728,1728,1728,1728,1728,1728
unique,4,4,4,3,3,3,4
top,vhigh,vhigh,2,2,small,low,unacc
freq,432,432,432,576,576,576,1210


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1728 entries, 0 to 1727
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   buying    1728 non-null   object
 1   maint     1728 non-null   object
 2   doors     1728 non-null   object
 3   persons   1728 non-null   object
 4   lug_boot  1728 non-null   object
 5   safety    1728 non-null   object
 6   class     1728 non-null   object
dtypes: object(7)
memory usage: 94.6+ KB


In [None]:
df.isna().sum()

Unnamed: 0,0
buying,0
maint,0
doors,0
persons,0
lug_boot,0
safety,0
class,0


In [None]:
print(df.columns)


Index(['Unnamed: 0', 'buying', 'maint', 'doors', 'persons', 'lug_boot',
       'safety', 'class'],
      dtype='object')


In [None]:
df = df.drop(columns=['Unnamed: 0'])

### **Converting Categorical variables:**

In [None]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
categorical_columns = ['buying', 'maint', 'doors', 'persons', 'safety', 'lug_boot']

# Initialize the OneHotEncoder
ohe = OneHotEncoder(sparse_output=False) #returns a numpy array

# Perform one-hot encoding for each categorical column
for column in categorical_columns:
    encoded_data = ohe.fit_transform(df[[column]])
    encoded_df = pd.DataFrame(encoded_data, columns=ohe.get_feature_names_out([column]))
    df = pd.concat([df, encoded_df], axis=1)
    df.drop(column, axis=1, inplace=True)

# Print the DataFrame to check the result
print(df)

      class  buying_high  buying_low  buying_med  buying_vhigh  maint_high  \
0     unacc          0.0         0.0         0.0           1.0         0.0   
1     unacc          0.0         0.0         0.0           1.0         0.0   
2     unacc          0.0         0.0         0.0           1.0         0.0   
3     unacc          0.0         0.0         0.0           1.0         0.0   
4     unacc          0.0         0.0         0.0           1.0         0.0   
...     ...          ...         ...         ...           ...         ...   
1723   good          0.0         1.0         0.0           0.0         0.0   
1724  vgood          0.0         1.0         0.0           0.0         0.0   
1725  unacc          0.0         1.0         0.0           0.0         0.0   
1726   good          0.0         1.0         0.0           0.0         0.0   
1727  vgood          0.0         1.0         0.0           0.0         0.0   

      maint_low  maint_med  maint_vhigh  doors_2  ...  doors_5m

In [None]:
Y=df['class']
X=df.drop('class',axis=1)

### **Splitting dataset:**

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split

X_train, X_remaining, Y_train, Y_remaining = train_test_split(X, Y, test_size=0.3, random_state=42)

# Step 2: Split the remaining data into validation (20% of the total) and test (10% of the total)
X_val, X_test, Y_val, Y_test = train_test_split(X_remaining, Y_remaining, test_size=1/3, random_state=42)

# Print the shapes of the resulting datasets
print(f"Training set size: {X_train.shape[0]} samples")
print(f"Validation set size: {X_val.shape[0]} samples")
print(f"Test set size: {X_test.shape[0]} samples")


Training set size: 1209 samples
Validation set size: 346 samples
Test set size: 173 samples


### **Applying SVM:**

In [None]:
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, cross_val_score
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

# Step 4: Define the SVM model
model = SVC(kernel='linear', random_state=42)

# Step 5: Apply 10-fold cross-validation
cv_scores = cross_val_score(model, X_train_scaled, Y_train, cv=10)


In [None]:
model.fit(X_train_scaled, Y_train)

# Print the training accuracy
print(f"Training accuracy: {model.score(X_train_scaled, Y_train)}")

Training accuracy: 0.9511993382961125


In [None]:
val_accuracy = model.score(X_val_scaled, Y_val)
test_accuracy = model.score(X_test_scaled, Y_test)

print(f"Validation accuracy: {val_accuracy}")
print(f"Test accuracy: {test_accuracy}")

Validation accuracy: 0.9335260115606936
Test accuracy: 0.9248554913294798


### **Applying Logistic regression:**

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier

# Define the Logistic Regression model
log_reg_model = LogisticRegression(random_state=42)

# Apply 10-fold cross-validation for Logistic Regression
log_reg_cv_scores = cross_val_score(log_reg_model, X_train_scaled, Y_train, cv=10)

# Fit the Logistic Regression model
log_reg_model.fit(X_train_scaled, Y_train)

# Print the training accuracy for Logistic Regression
print(f"Logistic Regression - Training accuracy: {log_reg_model.score(X_train_scaled, Y_train)}")
log_reg_val_accuracy = log_reg_model.score(X_val_scaled, Y_val)
log_reg_test_accuracy = log_reg_model.score(X_test_scaled, Y_test)

print(f"Logistic Regression - Validation accuracy: {log_reg_val_accuracy}")
print(f"Logistic Regression - Test accuracy: {log_reg_test_accuracy}")



Logistic Regression - Training accuracy: 0.9404466501240695
Logistic Regression - Validation accuracy: 0.9277456647398844
Logistic Regression - Test accuracy: 0.930635838150289


### **Applying Decision Trees**

In [None]:
# Define the Decision Tree model
decision_tree_model = DecisionTreeClassifier(random_state=42)

# Apply 10-fold cross-validation for Decision Tree
decision_tree_cv_scores = cross_val_score(decision_tree_model, X_train_scaled, Y_train, cv=10)

# Fit the Decision Tree model
decision_tree_model.fit(X_train_scaled, Y_train)

# Print the training accuracy for Decision Tree
print(f"Decision Tree - Training accuracy: {decision_tree_model.score(X_train_scaled, Y_train)}")
decision_tree_val_accuracy = decision_tree_model.score(X_val_scaled, Y_val)
decision_tree_test_accuracy = decision_tree_model.score(X_test_scaled, Y_test)

print(f"Decision Tree - Validation accuracy: {decision_tree_val_accuracy}")
print(f"Decision Tree - Test accuracy: {decision_tree_test_accuracy}")




Decision Tree - Training accuracy: 1.0
Decision Tree - Validation accuracy: 0.9710982658959537
Decision Tree - Test accuracy: 0.953757225433526
