In [None]:
import pandas as pd

url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
df = pd.read_csv(url, sep=";")
df

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Split the dataset into 10 samples of training and testing data
for i in range(10):
    train, test = train_test_split(df, test_size=0.3, random_state=i)
    print(f"Sample {i+1}: Training data shape: {train.shape}, Testing data shape: {test.shape}")


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC

# Load the wine quality dataset from CSV file
df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv', delimiter=';')

# Separate the features and target variable
X = df.drop('quality', axis=1)
y = df['quality']

# Perform 10 iterations of train-test split with 70-30 ratio
for i in range(10):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=i)

    # Train SVM classifier on the training data
    svm = SVC()
    svm.fit(X_train, y_train)

    # Evaluate the classifier on the testing data and print the accuracy
    accuracy = svm.score(X_test, y_test)
    print(f"Sample {i+1}: Accuracy = {accuracy:.4f}")


In [None]:
import pandas as pd
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC

# Fetch wine quality dataset
df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv', sep=';')

# Split into features and target variable
X = df.drop('quality', axis=1)
y = df['quality']

# Define SVM classifier
svm = SVC(max_iter=1000)

# Define parameter grid for grid search
param_grid = {'C': [0.1, 1, 10], 'kernel': ['linear', 'rbf', 'poly', 'sigmoid'], 'gamma': ['scale', 'auto'], 'degree': [2, 3]}

# Create empty dictionary to hold best kernel result for each sample
best_kernels = {}

# Perform grid search with 10-fold cross-validation for each sample
for i in range(10):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=i)

    svm_cv = GridSearchCV(svm, param_grid, cv=5, verbose=1)
    svm_cv.fit(X_train, y_train)

    # Update best_kernels dictionary with best kernel result for this sample
    best_kernels[f'Sample {i+1}'] = svm_cv.best_params_['kernel']

    # Print the best parameters and mean cross-validation score for this sample
    print(f"Sample {i+1}: Best parameters: {svm_cv.best_params_}, CV score: {svm_cv.best_score_}")

# Convert best_kernels dictionary to pandas DataFrame
best_kernels_df = pd.DataFrame.from_dict(best_kernels, orient='index', columns=['Best Kernel'])

print(best_kernels_df)


In [None]:
import pandas as pd
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.svm import SVR
from scipy.stats import uniform

# Fetch wine quality dataset
df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv', sep=';')

# Split into features and target variable
X = df.drop('quality', axis=1)
y = df['quality']

# Define SVM regressor
svm = SVR(max_iter=1000)

# Define parameter distribution for randomized search
param_dist = {'C': uniform(0.1, 100), 'kernel': ['linear', 'rbf', 'poly', 'sigmoid'], 'gamma': ['scale', 'auto'], 'epsilon': uniform(0.01, 1)}

# Perform randomized search with 10-fold cross-validation for each sample
for i in range(10):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=i)

    svm_rs = RandomizedSearchCV(svm, param_dist, cv=5, n_iter=100, verbose=1)
    svm_rs.fit(X_train, y_train)

    # Print the best parameters and mean cross-validation score for each sample
    print(f"Sample {i+1}: Best parameters: {svm_rs.best_params_}, CV score: {svm_rs.best_score_}")
    
    # Calculate test score for best model
    best_model = svm_rs.best_estimator_
    test_score = best_model.score(X_test, y_test)
    print(f"Test score: {test_score}")
    
    # Calculate epsilon deviation for best model
    y_pred = best_model.predict(X_test)
    eps_dev = y_test - y_pred
    print(f"Epsilon deviation: {eps_dev.mean()}")


In [None]:
import pandas as pd
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import NuSVR

# Fetch wine quality dataset
df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv', sep=';')

# Split into features and target variable
X = df.drop('quality', axis=1)
y = df['quality']

# Define parameter grid for grid search
param_grid = {'kernel': ['linear', 'rbf', 'poly', 'sigmoid'], 'gamma': ['scale', 'auto'], 'degree': [2, 3]}

# Define function to calculate best hyperparameters for each sample
def optimize_hyperparameters(X, y, param_grid, model):
    results = []
    for i in range(10):
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=i)
        svm_cv = GridSearchCV(model, param_grid, cv=5, verbose=1, error_score='raise')
        svm_cv.fit(X_train, y_train)
        results.append([svm_cv.best_params_['kernel'], svm_cv.best_params_.get('nu', '-'), svm_cv.best_params_.get('epsilon', '-')])
    return results

# Calculate best hyperparameters for each sample
results = optimize_hyperparameters(X, y, param_grid, NuSVR())

# Create table to store results
results_df = pd.DataFrame(results, columns=['Best kernel', 'Nu', 'Epsilon'])

# Print table
print(results_df)


In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import NuSVC
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt

# Load the dataset
data = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/abalone/abalone.data', header=None)

# Convert the sex column to numeric
data[0] = pd.factorize(data[0])[0]

# Split the data into features and target
X = data.iloc[:, :-1]
y = data.iloc[:, -1]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the parameter grid for GridSearchCV
param_grid = {
    'kernel': ['linear', 'rbf', 'sigmoid', 'poly'],
    'nu': [0.01, 0.05, 0.1, 0.5, 0.9],
    'degree': [2, 3, 4]
}

# Define the GridSearchCV object
grid = GridSearchCV(NuSVC(), param_grid, cv=5)

results = []

# Iterate over all the samples in the training set
for i in range(X_train.shape[0]):
    sample = X_train.iloc[[i], :]
    target = y_train.iloc[[i]]

    # Fit the model with the current sample
    grid.fit(sample, target)

    # Get the best model
    best_svm = grid.best_estimator_

    # Calculate the accuracy on the testing set
    y_pred = best_svm.predict(X_test)
    acc = accuracy_score(y_test, y_pred)

    # Add the results to the list
    results.append({
        'sample': i,
        'svm': best_svm,
        'accuracy': acc,
        'kernel': grid.best_params_['kernel'],
        'nu': grid.best_params_['nu'],
        'degree': grid.best_params_['degree']
    })

# Print the results in a table
results_df = pd.DataFrame(results)
print(results_df.to_string(index=False))

# Get the sample with the highest accuracy
max_acc_sample = max(results, key=lambda x: x['accuracy'])

# Plot the convergence graph for this sample
plt.plot(max_acc_sample['svm'].n_support_, label='n_support')
plt.plot(max_acc_sample['svm'].dual_coef_, label='dual_coef')
plt.xlabel('Iteration')
plt.ylabel('Convergence')
plt.title(f"Convergence graph for sample {max_acc_sample['sample']} with {max_acc_sample['kernel']} kernel")
plt.legend()
plt.show()
