In [2]:
# Install the required package
!pip install ucimlrepo

Collecting ucimlrepo
  Downloading ucimlrepo-0.0.7-py3-none-any.whl (8.0 kB)
Installing collected packages: ucimlrepo
Successfully installed ucimlrepo-0.0.7


In [4]:


# Import necessary libraries
import pandas as pd
from ucimlrepo import fetch_ucirepo

# Retrieve dataset from UCI Machine Learning Repository
data_id = 17  # ID for the Breast Cancer Wisconsin (Diagnostic) dataset
dataset = fetch_ucirepo(id=data_id)

# Transform the dataset into pandas DataFrames
features = pd.DataFrame(dataset.data.features)
labels = pd.DataFrame(dataset.data.targets)

# Display dataset metadata for verification
print(dataset.metadata)

# Display information about variables in the dataset
print(dataset.variables)

# Rename feature columns for clarity
features.columns = dataset.variables.loc[2:, 'name'].values

# Rename the target column for better understanding
labels.columns = ['Diagnosis']

# Display the first few rows to ensure data is loaded correctly
print(features.head())
print(labels.head())

# Convert the 'Diagnosis' column to a Series for further processing
labels = labels['Diagnosis']


{'uci_id': 17, 'name': 'Breast Cancer Wisconsin (Diagnostic)', 'repository_url': 'https://archive.ics.uci.edu/dataset/17/breast+cancer+wisconsin+diagnostic', 'data_url': 'https://archive.ics.uci.edu/static/public/17/data.csv', 'abstract': 'Diagnostic Wisconsin Breast Cancer Database.', 'area': 'Health and Medicine', 'tasks': ['Classification'], 'characteristics': ['Multivariate'], 'num_instances': 569, 'num_features': 30, 'feature_types': ['Real'], 'demographics': [], 'target_col': ['Diagnosis'], 'index_col': ['ID'], 'has_missing_values': 'no', 'missing_values_symbol': None, 'year_of_dataset_creation': 1993, 'last_updated': 'Fri Nov 03 2023', 'dataset_doi': '10.24432/C5DW2B', 'creators': ['William Wolberg', 'Olvi Mangasarian', 'Nick Street', 'W. Street'], 'intro_paper': {'title': 'Nuclear feature extraction for breast tumor diagnosis', 'authors': 'W. Street, W. Wolberg, O. Mangasarian', 'published_in': 'Electronic imaging', 'year': 1993, 'url': 'https://www.semanticscholar.org/paper/53

In [6]:
# Import libraries for model training and evaluation
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=100)

# Initialize the Support Vector Machine Classifier
svm_classifier = SVC(kernel='linear', random_state=100)

# Train the classifier with the training data
svm_classifier.fit(X_train, y_train)

# Predict the labels for the test set
predictions = svm_classifier.predict(X_test)

# Calculate and display the accuracy of the model
model_accuracy = accuracy_score(y_test, predictions)
print("Model Accuracy:", model_accuracy)

Model Accuracy: 0.956140350877193
