In [2]:
import pandas as pd
import numpy as np

url = "https://archive.ics.uci.edu/ml/machine-learning-databases/letter-recognition/letter-recognition.data"
df = pd.read_csv(url, header=None)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, GridSearchCV, learning_curve

# Load the dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/letter-recognition/letter-recognition.data"
data = pd.read_csv(url, header=None)

# Define the range of samples
n_samples = 10

# Define the parameter grid to search over
param_grid = {'C': [0.1, 1, 10], 'kernel': ['linear', 'rbf'], 'gamma': ['scale', 'auto']}

# Initialize arrays to store the results
accuracies = np.zeros(n_samples)
train_sizes = np.zeros(10)
train_scores = np.zeros((n_samples, len(train_sizes)))
test_scores = np.zeros((n_samples, len(train_sizes)))


# Loop over the samples
for i in range(n_samples):
    # Split the data into training and testing sets
    train_data, test_data, train_labels, test_labels = train_test_split(data.iloc[:, 1:], data.iloc[:, 0], test_size=0.3, random_state=i)
    
    # Initialize the SVM and the GridSearchCV object
    svm = SVC()
    grid_search = GridSearchCV(svm, param_grid, cv=10)
    
    # Fit the GridSearchCV object to the training data
    grid_search.fit(train_data, train_labels)
    
    # Get the best hyperparameters and the corresponding accuracy
    best_params = grid_search.best_params_
    best_svm = SVC(C=best_params['C'], kernel=best_params['kernel'], gamma=best_params['gamma'])
    best_svm.fit(train_data, train_labels)
    accuracies[i] = best_svm.score(test_data, test_labels)
    
    # Calculate the learning curve
    train_sizes[i], train_scores[i], test_scores[i] = learning_curve(best_svm, train_data, train_labels, cv=10, scoring='accuracy', train_sizes=np.linspace(0.1, 1.0, 10))
    
# Find the sample with the maximum accuracy
max_index = np.argmax(accuracies)

# Plot the learning curve for the sample with the maximum accuracy
plt.figure()
plt.title('Learning Curve (SVM)')
plt.xlabel('Training Set Size')
plt.ylabel('Accuracy')
plt.ylim(0.7, 1.0)
plt.fill_between(train_sizes[max_index], np.mean(train_scores[max_index], axis=1) - np.std(train_scores[max_index], axis=1), np.mean(train_scores[max_index], axis=1) + np.std(train_scores[max_index], axis=1), alpha=0.1, color='r')
plt.fill_between(train_sizes[max_index], np.mean(test_scores[max_index], axis=1) - np.std(test_scores[max_index], axis=1), np.mean(test_scores[max_index], axis=1) + np.std(test_scores[max_index], axis=1), alpha=0.1, color='g')
plt.plot(train_sizes[max_index], np.mean(train_scores[max_index], axis=1), 'o-', color='r', label='Training Score')
plt.plot(train_sizes[max_index], np.mean(test_scores[max_index], axis=1), 'o-', color='g', label='Cross-Validation Score')
plt.legend(loc='best')
plt.show()
