<a href="https://colab.research.google.com/github/Christinathattil/AML/blob/Main/lab_6_PCA%2CLDA%2CKNN%2CDecisionTree%2CRandomForest.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from sklearn.datasets import load_breast_cancer, load_iris, load_diabetes
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics

In [None]:
# Load datasets
breast_cancer_data = load_breast_cancer()
iris_data = load_iris()
diabetes_data = load_diabetes()

In [None]:
# Apply PCA
def apply_pca(X, n_components):
    pca = PCA(n_components=n_components)
    X_pca = pca.fit_transform(X)
    print("Variance of coefficients:", pca.explained_variance_ratio_)
    return X_pca

In [None]:
# Apply LDA
def apply_lda(X, y, n_components):
    lda = LinearDiscriminantAnalysis(n_components=n_components)
    X_lda = lda.fit_transform(X, y)
    return X_lda

In [None]:
# Apply classification algorithms
def apply_classifiers(X_train, X_test, y_train, y_test):
    classifiers = {
        "KNN": KNeighborsClassifier(),
        "Decision Tree": DecisionTreeClassifier(),
        "Random Forest": RandomForestClassifier()
    }

    for name, clf in classifiers.items():
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)

        print(f"\nMetrics for {name}:")
        print("Accuracy:", metrics.accuracy_score(y_test, y_pred))
        print("Precision:", metrics.precision_score(y_test, y_pred, average='weighted'))
        print("Recall:", metrics.recall_score(y_test, y_pred, average='weighted'))
        print("F1 Score:", metrics.f1_score(y_test, y_pred, average='weighted'))

In [None]:
# Breast Cancer dataset
X_bc, y_bc = breast_cancer_data.data, breast_cancer_data.target
X_bc = StandardScaler().fit_transform(X_bc)
X_bc_pca = apply_pca(X_bc, n_components=2)
X_bc_lda = apply_lda(X_bc, y_bc, n_components=1)


Variance of coefficients: [0.44272026 0.18971182]


In [None]:
X_bc_train, X_bc_test, y_bc_train, y_bc_test = train_test_split(X_bc, y_bc, test_size=0.2, random_state=42)
X_bc_pca_train, X_bc_pca_test, _, _ = train_test_split(X_bc_pca, y_bc, test_size=0.2, random_state=42)
X_bc_lda_train, X_bc_lda_test, _, _ = train_test_split(X_bc_lda, y_bc, test_size=0.2, random_state=42)


In [None]:
print("\nBreast Cancer Dataset:")
apply_classifiers(X_bc_train, X_bc_test, y_bc_train, y_bc_test)
apply_classifiers(X_bc_pca_train, X_bc_pca_test, y_bc_train, y_bc_test)
apply_classifiers(X_bc_lda_train, X_bc_lda_test, y_bc_train, y_bc_test)


Breast Cancer Dataset:

Metrics for KNN:
Accuracy: 0.9473684210526315
Precision: 0.9473684210526315
Recall: 0.9473684210526315
F1 Score: 0.9473684210526315

Metrics for Decision Tree:
Accuracy: 0.9473684210526315
Precision: 0.9473684210526315
Recall: 0.9473684210526315
F1 Score: 0.9473684210526315

Metrics for Random Forest:
Accuracy: 0.956140350877193
Precision: 0.9560881370091896
Recall: 0.956140350877193
F1 Score: 0.9560357083576897

Metrics for KNN:
Accuracy: 0.9736842105263158
Precision: 0.9739576213260425
Recall: 0.9736842105263158
F1 Score: 0.973742132293316

Metrics for Decision Tree:
Accuracy: 0.9736842105263158
Precision: 0.97475106685633
Recall: 0.9736842105263158
F1 Score: 0.9734808562744625

Metrics for Random Forest:
Accuracy: 0.9736842105263158
Precision: 0.9737190197716513
Recall: 0.9736842105263158
F1 Score: 0.9736214250146138

Metrics for KNN:
Accuracy: 0.9736842105263158
Precision: 0.9739576213260425
Recall: 0.9736842105263158
F1 Score: 0.973742132293316

Metrics fo

In [None]:
# Iris Dataset
X_iris, y_iris = iris_data.data, iris_data.target
X_iris = StandardScaler().fit_transform(X_iris)
X_iris_pca = apply_pca(X_iris, n_components=2)
X_iris_lda = apply_lda(X_iris, y_iris, n_components=1)

Variance of coefficients: [0.72962445 0.22850762]


In [None]:
X_iris_train, X_iris_test, y_iris_train, y_iris_test = train_test_split(X_iris, y_iris, test_size=0.2, random_state=42)
X_iris_pca_train, X_iris_pca_test, _, _ = train_test_split(X_iris_pca, y_iris, test_size=0.2, random_state=42)
X_iris_lda_train, X_iris_lda_test, _, _ = train_test_split(X_iris_lda, y_iris, test_size=0.2, random_state=42)


In [None]:
print("\nIris Dataset:")
apply_classifiers(X_iris_train, X_iris_test, y_iris_train, y_iris_test)
apply_classifiers(X_iris_pca_train, X_iris_pca_test, y_iris_train, y_iris_test)
apply_classifiers(X_iris_lda_train, X_iris_lda_test, y_iris_train, y_iris_test)



Iris Dataset:

Metrics for KNN:
Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1 Score: 1.0

Metrics for Decision Tree:
Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1 Score: 1.0

Metrics for Random Forest:
Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1 Score: 1.0

Metrics for KNN:
Accuracy: 0.9333333333333333
Precision: 0.9333333333333333
Recall: 0.9333333333333333
F1 Score: 0.9333333333333333

Metrics for Decision Tree:
Accuracy: 0.9
Precision: 0.9013888888888889
Recall: 0.9
F1 Score: 0.89923273657289

Metrics for Random Forest:
Accuracy: 0.9333333333333333
Precision: 0.9333333333333333
Recall: 0.9333333333333333
F1 Score: 0.9333333333333333

Metrics for KNN:
Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1 Score: 1.0

Metrics for Decision Tree:
Accuracy: 0.9666666666666667
Precision: 0.9700000000000001
Recall: 0.9666666666666667
F1 Score: 0.966750208855472

Metrics for Random Forest:
Accuracy: 0.9666666666666667
Precision: 0.9700000000000001
Recall: 0.9666666666666667
F1 Score: 0.966750208855472


In [None]:
# Diabetes Dataset
X_diabetes, y_diabetes = diabetes_data.data, diabetes_data.target
X_diabetes = StandardScaler().fit_transform(X_diabetes)
X_diabetes_pca = apply_pca(X_diabetes, n_components=2)
X_diabetes_lda = apply_lda(X_diabetes, y_diabetes, n_components=1)


Variance of coefficients: [0.40242108 0.14923197]


In [None]:
X_diabetes_train, X_diabetes_test, y_diabetes_train, y_diabetes_test = train_test_split(X_diabetes, y_diabetes, test_size=0.2, random_state=42)
X_diabetes_pca_train, X_diabetes_pca_test, _, _ = train_test_split(X_diabetes_pca, y_diabetes, test_size=0.2, random_state=42)
X_diabetes_lda_train, X_diabetes_lda_test, _, _ = train_test_split(X_diabetes_lda, y_diabetes, test_size=0.2, random_state=42)


In [None]:
print("\nDiabetes Dataset:")
apply_classifiers(X_diabetes_train, X_diabetes_test, y_diabetes_train, y_diabetes_test)
apply_classifiers(X_diabetes_pca_train, X_diabetes_pca_test, y_diabetes_train, y_diabetes_test)
apply_classifiers(X_diabetes_lda_train, X_diabetes_lda_test, y_diabetes_train, y_diabetes_test)


Diabetes Dataset:

Metrics for KNN:
Accuracy: 0.0
Precision: 0.0
Recall: 0.0
F1 Score: 0.0

Metrics for Decision Tree:
Accuracy: 0.0
Precision: 0.0
Recall: 0.0
F1 Score: 0.0


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Metrics for Random Forest:
Accuracy: 0.0
Precision: 0.0
Recall: 0.0
F1 Score: 0.0

Metrics for KNN:
Accuracy: 0.0
Precision: 0.0
Recall: 0.0
F1 Score: 0.0

Metrics for Decision Tree:
Accuracy: 0.011235955056179775
Precision: 0.011235955056179775
Recall: 0.011235955056179775
F1 Score: 0.011235955056179775


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Metrics for Random Forest:
Accuracy: 0.0
Precision: 0.0
Recall: 0.0
F1 Score: 0.0

Metrics for KNN:
Accuracy: 0.011235955056179775
Precision: 0.0044943820224719105
Recall: 0.011235955056179775
F1 Score: 0.006420545746388444

Metrics for Decision Tree:
Accuracy: 0.0
Precision: 0.0
Recall: 0.0
F1 Score: 0.0


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Metrics for Random Forest:
Accuracy: 0.0
Precision: 0.0
Recall: 0.0
F1 Score: 0.0


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
