<a href="https://colab.research.google.com/github/P-shettar/ML-Lab/blob/main/support_vector_machine.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# Support Vector Machine on smartphones.csv
# Objective: Train and evaluate Linear and Kernel (RBF) SVMs and compare performance

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# 1. Load dataset
df = pd.read_csv("smartphones.csv")  # make sure this file is in the same folder

print("Shape of data:", df.shape)
print("Columns:", df.columns.tolist())

# 2. Choose target and features
# Target: whether phone supports 5G
y = df["5G_or_not"]

# Drop target and 'model' (model has almost unique values, not useful as feature)
X = df.drop(columns=["5G_or_not", "model"])

# 3. Handle missing values
# For numeric columns -> fill with median
# For categorical columns -> fill with mode
for col in X.columns:
    if X[col].dtype == "object":
        X[col] = X[col].fillna(X[col].mode()[0])
    else:
        X[col] = X[col].fillna(X[col].median())

# 4. Convert categorical columns to numeric using one-hot encoding
X = pd.get_dummies(X, drop_first=True)

print("Shape after one-hot encoding:", X.shape)

# 5. Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)

# 6. Feature scaling (very important for SVM)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 7. Train Linear SVM
svm_linear = SVC(kernel="linear", random_state=42)
svm_linear.fit(X_train_scaled, y_train)
y_pred_linear = svm_linear.predict(X_test_scaled)

# 8. Train RBF Kernel SVM
svm_rbf = SVC(kernel="rbf", gamma="scale", random_state=42)
svm_rbf.fit(X_train_scaled, y_train)
y_pred_rbf = svm_rbf.predict(X_test_scaled)

# 9. Evaluate and compare

print("========== Linear SVM ==========")
print("Accuracy:", accuracy_score(y_test, y_pred_linear))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_linear))
print("Classification Report:\n", classification_report(y_test, y_pred_linear))

print("\n========== RBF Kernel SVM ==========")
print("Accuracy:", accuracy_score(y_test, y_pred_rbf))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_rbf))
print("Classification Report:\n", classification_report(y_test, y_pred_rbf))


Shape of data: (980, 22)
Columns: ['brand_name', 'model', 'price', 'avg_rating', '5G_or_not', 'processor_brand', 'num_cores', 'processor_speed', 'battery_capacity', 'fast_charging_available', 'fast_charging', 'ram_capacity', 'internal_memory', 'screen_size', 'refresh_rate', 'num_rear_cameras', 'os', 'primary_camera_rear', 'primary_camera_front', 'extended_memory_available', 'resolution_height', 'resolution_width']
Shape after one-hot encoding: (980, 76)
Accuracy: 0.9013605442176871
Confusion Matrix:
 [[116  13]
 [ 16 149]]
Classification Report:
               precision    recall  f1-score   support

           0       0.88      0.90      0.89       129
           1       0.92      0.90      0.91       165

    accuracy                           0.90       294
   macro avg       0.90      0.90      0.90       294
weighted avg       0.90      0.90      0.90       294


Accuracy: 0.9387755102040817
Confusion Matrix:
 [[116  13]
 [  5 160]]
Classification Report:
               precision 