In [35]:
from sklearn.datasets import make_classification
import pandas as pd

# Generate synthetic data
X, y = make_classification(
    n_samples=1000,  # Number of samples
    n_features=10,   # Number of features
    n_informative=8, # Number of informative features
    n_redundant=2,   # Number of redundant features
    n_classes=2,     # Number of classes (binary classification)
    random_state=42   # Set a random seed for reproducibility
)

# Convert the arrays into a DataFrame for better visualization
columns = [f"feature_{i}" for i in range(X.shape[1])]
df = pd.DataFrame(data=X, columns=columns)
df['target'] = y

# Display the first few rows of the DataFrame
print(df.head())


   feature_0  feature_1  feature_2  feature_3  feature_4  feature_5  \
0  -0.224515  -0.648598   2.805591   0.569525   2.676771   0.685312   
1   1.038548  -1.316803   2.303387   1.233593   4.972675   1.086905   
2   1.148988   1.794382   2.835693  -1.114858  -1.247120  -1.899268   
3  -1.177318   2.420294  -0.363514  -1.086646  -2.824750  -2.722123   
4   1.346717   0.089373   2.056613  -0.428365   0.285387   1.140716   

   feature_6  feature_7  feature_8  feature_9  target  
0   0.306675   1.147291  -2.145905   2.477879       1  
1  -3.548788   0.342810  -0.469005   2.787109       1  
2  -0.381470   0.837209   0.163223  -0.997959       1  
3  -0.225266   0.515079  -3.317272  -2.801517       1  
4   0.721945   1.110152   2.426608   0.880496       0  


In [36]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [38]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import accuracy_score, classification_report

# Load the Iris dataset
iris = load_iris()
X, y = iris.data, iris.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Apply LDA
lda = LinearDiscriminantAnalysis()
X_train_lda = lda.fit_transform(X_train, y_train)
X_test_lda = lda.transform(X_test)

# Train a classifier on the LDA-transformed data (for example, a simple logistic regression)
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression()
classifier.fit(X_train_lda, y_train)

# Make predictions on the test set
y_pred = classifier.predict(X_test_lda)

# Evaluate the performance
accuracy_LDA = accuracy_score(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

# Print the results
print("Accuracy with LDA:", accuracy_LDA)
print("\nClassification Report:\n", classification_rep)


Accuracy with LDA: 1.0

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



In [39]:
from sklearn.decomposition import PCA
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load a sample dataset (e.g., digits dataset)
data = load_digits()
X, y = data.data, data.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 1. Apply PCA
pca = PCA(n_components=10)  # Set the number of components (adjust as needed)
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)

# 2. Train a classifier (e.g., RandomForest) on the PCA-transformed data
rf_classifier = RandomForestClassifier()
rf_classifier.fit(X_train_pca, y_train)

# 3. Make predictions on the test set
y_pred = rf_classifier.predict(X_test_pca)

# 4. Evaluate the performance
accuracy_PCA = accuracy_score(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

# Print the results
print("Test Accuracy with PCA:", accuracy_PCA)
print("\nClassification Report:\n", classification_rep)


Test Accuracy with PCA: 0.9611111111111111

Classification Report:
               precision    recall  f1-score   support

           0       0.97      1.00      0.99        33
           1       0.90      1.00      0.95        28
           2       1.00      1.00      1.00        33
           3       1.00      0.94      0.97        34
           4       1.00      1.00      1.00        46
           5       0.94      0.94      0.94        47
           6       1.00      0.97      0.99        35
           7       1.00      0.97      0.99        34
           8       0.93      0.87      0.90        30
           9       0.88      0.93      0.90        40

    accuracy                           0.96       360
   macro avg       0.96      0.96      0.96       360
weighted avg       0.96      0.96      0.96       360



In [40]:
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

# Load the digits dataset
digits = load_digits()
X, y = digits.data, digits.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the SVM classifier
svm_classifier = SVC()

# Define the hyperparameter grid to search
param_grid = {
    'C': [0.1, 1, 10],             # Regularization parameter
    'kernel': ['linear', 'rbf'],   # Kernel type
    'gamma': ['scale', 'auto']     # Kernel coefficient for 'rbf' kernel
}

# Instantiate the GridSearchCV object
grid_search = GridSearchCV(svm_classifier, param_grid, scoring='accuracy', cv=5)

# Fit the GridSearchCV object to the data
grid_search.fit(X_train, y_train)

# Get the best parameters and best estimator from the grid search
best_params = grid_search.best_params_
best_model = grid_search.best_estimator_

# Make predictions on the test set using the best model
y_pred = best_model.predict(X_test)

# Evaluate the performance of the best model
accuracy = accuracy_score(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

# Print the results
print("Best Parameters using GridSearchCV:", best_params)
print("\nTest Accuracy with GridSearch:", accuracy)
print("\nClassification Report:\n", classification_rep)


Best Parameters using GridSearchCV: {'C': 10, 'gamma': 'scale', 'kernel': 'rbf'}

Test Accuracy with GridSearch: 0.9861111111111112

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        33
           1       1.00      1.00      1.00        28
           2       1.00      1.00      1.00        33
           3       1.00      0.97      0.99        34
           4       1.00      1.00      1.00        46
           5       0.96      0.98      0.97        47
           6       0.97      1.00      0.99        35
           7       0.97      0.97      0.97        34
           8       1.00      0.97      0.98        30
           9       0.97      0.97      0.97        40

    accuracy                           0.99       360
   macro avg       0.99      0.99      0.99       360
weighted avg       0.99      0.99      0.99       360



In [41]:
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.svm import SVC
from scipy.stats import reciprocal, uniform
from sklearn.metrics import accuracy_score, classification_report

# Load the digits dataset
digits = load_digits()
X, y = digits.data, digits.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the SVM classifier
svm_classifier = SVC()

# Define the hyperparameter distributions to sample from
param_distributions = {
    'C': reciprocal(0.1, 10),       # Reciprocal distribution for 'C'
    'kernel': ['linear', 'rbf'],     # Choice of 'linear' or 'rbf' kernel
    'gamma': uniform(0.1, 1.0)       # Uniform distribution for 'gamma'
}

# Instantiate the RandomizedSearchCV object
random_search = RandomizedSearchCV(svm_classifier, param_distributions, n_iter=10, scoring='accuracy', cv=5, random_state=42)

# Fit the RandomizedSearchCV object to the data
random_search.fit(X_train, y_train)

# Get the best parameters and best estimator from the random search
best_params_random = random_search.best_params_
best_model_random = random_search.best_estimator_

# Make predictions on the test set using the best model
y_pred_random = best_model_random.predict(X_test)

# Evaluate the performance of the best model from random search
accuracy_random = accuracy_score(y_test, y_pred_random)
classification_rep_random = classification_report(y_test, y_pred_random)

# Print the results
print("Best Parameters using RandomizedSearchCV:", best_params_random)
print("\nTest Accuracy with Best Model (Randomized):", accuracy_random)
print("\nClassification Report (Randomized):\n", classification_rep_random)


Best Parameters using RandomizedSearchCV: {'C': 0.5611516415334504, 'gamma': 1.0507143064099163, 'kernel': 'linear'}

Test Accuracy with Best Model (Randomized): 0.9777777777777777

Classification Report (Randomized):
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        33
           1       0.97      1.00      0.98        28
           2       1.00      1.00      1.00        33
           3       0.97      0.94      0.96        34
           4       0.98      0.98      0.98        46
           5       0.96      1.00      0.98        47
           6       1.00      1.00      1.00        35
           7       0.97      0.97      0.97        34
           8       1.00      0.97      0.98        30
           9       0.95      0.93      0.94        40

    accuracy                           0.98       360
   macro avg       0.98      0.98      0.98       360
weighted avg       0.98      0.98      0.98       360



In [42]:
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report

# Load the digits dataset
digits = load_digits()
X, y = digits.data, digits.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 1. Apply Feature Scaling (Standardization)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 2. Define the SVM classifier
svm_classifier = SVC()

# 3. Train the classifier on the scaled data
svm_classifier.fit(X_train_scaled, y_train)

# 4. Make predictions on the test set
y_pred = svm_classifier.predict(X_test_scaled)

# 5. Evaluate the performance
accuracy = accuracy_score(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

# Print the results
print("Test Accuracy with Feature Scaling:", accuracy)
print("\nClassification Report with Feature Scaling:\n", classification_rep)


Test Accuracy with Feature Scaling: 0.9805555555555555

Classification Report with Feature Scaling:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        33
           1       1.00      1.00      1.00        28
           2       1.00      1.00      1.00        33
           3       1.00      0.97      0.99        34
           4       0.96      1.00      0.98        46
           5       0.96      0.98      0.97        47
           6       0.97      1.00      0.99        35
           7       1.00      0.94      0.97        34
           8       0.97      0.97      0.97        30
           9       0.97      0.95      0.96        40

    accuracy                           0.98       360
   macro avg       0.98      0.98      0.98       360
weighted avg       0.98      0.98      0.98       360



In [43]:
from sklearn.datasets import load_digits
from sklearn.model_selection import cross_val_score, KFold
from sklearn.svm import SVC

# Load the digits dataset
digits = load_digits()
X, y = digits.data, digits.target

# 1. Define the SVM classifier
svm_classifier = SVC()

# 2. Perform k-fold cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)  # Choose the number of folds (e.g., 5)

# Perform cross-validation and get accuracy scores
cross_val_scores = cross_val_score(svm_classifier, X, y, cv=kf)

# 3. Print the cross-validation scores
print("Cross-Validation Scores:", cross_val_scores)

# 4. Print the mean accuracy
mean_accuracy = cross_val_scores.mean()
print("Mean Cross-Validation Accuracy:", mean_accuracy)


Cross-Validation Scores: [0.98611111 0.98888889 0.98607242 0.98885794 0.98885794]
Mean Cross-Validation Accuracy: 0.987757660167131


In [44]:
from sklearn.datasets import load_digits
from sklearn.model_selection import KFold
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Load the digits dataset
digits = load_digits()
X, y = digits.data, digits.target

# Define the SVM classifier
svm_classifier = SVC()

# Specify the number of folds (e.g., 5)
kfold = KFold(n_splits=5, shuffle=True, random_state=42)

# Initialize a variable to store the cross-validation scores
cross_val_scores = []

# Iterate through each fold
for train_index, test_index in kfold.split(X):
    # Split the data into training and testing sets for this fold
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    # Train the model on the training set
    svm_classifier.fit(X_train, y_train)
    
    # Make predictions on the test set
    y_pred = svm_classifier.predict(X_test)
    
    # Calculate and store the accuracy for this fold
    fold_accuracy = accuracy_score(y_test, y_pred)
    cross_val_scores.append(fold_accuracy)

# Print the accuracy for each fold
print("Cross-Validation Scores:", cross_val_scores)

# Calculate and print the mean accuracy
mean_accuracy = sum(cross_val_scores) / len(cross_val_scores)
print("Mean Cross-Validation Accuracy:", mean_accuracy)


Cross-Validation Scores: [0.9861111111111112, 0.9888888888888889, 0.9860724233983287, 0.9888579387186629, 0.9888579387186629]
Mean Cross-Validation Accuracy: 0.987757660167131


In [45]:
from sklearn.datasets import load_digits
from sklearn.model_selection import LeaveOneOut
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Load the digits dataset
digits = load_digits()
X, y = digits.data, digits.target

# Define the SVM classifier
svm_classifier = SVC()

# Use LeaveOneOut for LOOCV
loo = LeaveOneOut()

# Initialize a variable to store the LOOCV scores
loocv_scores = []

# Iterate through each fold (leave one out)
for train_index, test_index in loo.split(X):
    # Split the data into training and testing sets for this iteration
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    # Train the model on the training set
    svm_classifier.fit(X_train, y_train)
    
    # Make predictions on the test set
    y_pred = svm_classifier.predict(X_test)
    
    # Calculate and store the accuracy for this iteration
    iteration_accuracy = accuracy_score(y_test, y_pred)
    loocv_scores.append(iteration_accuracy)

# Print the accuracy for each iteration
print("LOOCV Scores:", loocv_scores)

# Calculate and print the mean accuracy
mean_accuracy_loocv = sum(loocv_scores) / len(loocv_scores)
print("Mean LOOCV Accuracy:", mean_accuracy_loocv)


LOOCV Scores: [1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 

In [46]:
print("Mean LOOCV Accuracy:", mean_accuracy_loocv)
print("Mean Cross-Validation Accuracy:", mean_accuracy)
print("Test Accuracy with Feature Scaling:", accuracy)
print("\nTest Accuracy with Best Model (Randomized):", accuracy_random)
print("\nTest Accuracy with GridSearch:", accuracy)
print("Test Accuracy with PCA:", accuracy_PCA)
print("Accuracy with LDA:", accuracy_LDA)

Mean LOOCV Accuracy: 0.988313856427379
Mean Cross-Validation Accuracy: 0.987757660167131
Test Accuracy with Feature Scaling: 0.9805555555555555

Test Accuracy with Best Model (Randomized): 0.9777777777777777

Test Accuracy with GridSearch: 0.9805555555555555
Test Accuracy with PCA: 0.9611111111111111
Accuracy with LDA: 1.0
