<a href="https://colab.research.google.com/github/ArtfulMonster/Machine-Learning/blob/main/HW4_Q2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [20]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score

# Load the housing data set
Housing = pd.read_csv("Housing.csv")
varlist = ['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea']
Housing[varlist] = Housing[varlist].apply(lambda x: x.map({'yes': 1, 'no':0}))


X = Housing[['area', 'bedrooms', 'bathrooms', 'stories', 'mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'parking', 'prefarea']]
y = Housing['price']


# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state= 0)

# Standardize the feature data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Initialize lists to store metric values for different K
K_values = list(range(1, min(X_train.shape[0], X_train.shape[1])))
accuracies = []
precisions = []
recalls = []

best_k = 0
best_accuracy = 0

for k in K_values:
    # Apply PCA
    pca = PCA(n_components=k)
    X_train_pca = pca.fit_transform(X_train)
    X_test_pca = pca.transform(X_test)

    # Train an SVR classifier
    clf = SVC()
    clf.fit(X_train_pca, y_train)

    # Evaluate the classifier
    y_pred = clf.predict(X_test_pca)

    # Calculate metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)

    accuracies.append(accuracy)
    precisions.append(precision)
    recalls.append(recall)

    # Update the best K if accuracy is improved
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_k = k

# Output the best K
print(f"Best K: {best_k}")

# Plot the metrics over different K values
plt.figure(figsize=(10, 6))
plt.plot(K_values, accuracies, label='Accuracy', marker='o')
plt.plot(K_values, precisions, label='Precision', marker='o')
plt.plot(K_values, recalls, label='Recall', marker='o')
plt.xlabel('Number of Principal Components (K)')
plt.ylabel('Metrics')
plt.title('SVR Classifier Metrics vs. Number of Principal Components (K)')
plt.legend()
plt.grid(True)
plt.show()



ValueError: ignored