In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score



In [2]:
# Load the dataset
data = pd.read_csv('train.csv')
print(data.head())
print(data.describe())
print(data.info())
print(data.isnull().sum())

   battery_power  blue  clock_speed  dual_sim  fc  four_g  int_memory  m_dep  \
0            842     0          2.2         0   1       0           7    0.6   
1           1021     1          0.5         1   0       1          53    0.7   
2            563     1          0.5         1   2       1          41    0.9   
3            615     1          2.5         0   0       0          10    0.8   
4           1821     1          1.2         0  13       1          44    0.6   

   mobile_wt  n_cores  ...  px_height  px_width   ram  sc_h  sc_w  talk_time  \
0        188        2  ...         20       756  2549     9     7         19   
1        136        3  ...        905      1988  2631    17     3          7   
2        145        5  ...       1263      1716  2603    11     2          9   
3        131        6  ...       1216      1786  2769    16     8         11   
4        141        2  ...       1208      1212  1411     8     2         15   

   three_g  touch_screen  wifi  price_

In [3]:
# Here we assume 'price_range' is in the dataset for classification
# Normally, 'price_range' would be provided in the training dataset
# Uncomment the following line and add price_range if you have it
# data['price_range'] = [assumed_labels]  # Replace with actual target labels if available

# Set the target variable and features
# 'price_range' should be the target variable for training, so you need to have it in your data
# Assuming 'price_range' is the last column if it's available
X = data.drop(columns=['price_range'])
y = data['price_range']  # Replace with actual labels if needed

In [4]:
# Step 1: Preprocess the Data

# Fill any missing values if there are any (not expected in this dataset)
X.fillna(X.mean(), inplace=True)

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [5]:
# Step 2: Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Step 3: Train and evaluate SVM models with different kernels

# i. Linear Kernel SVM
svm_linear = SVC(kernel='linear', random_state=42)
svm_linear.fit(X_train, y_train)
y_pred_linear = svm_linear.predict(X_test)

# Accuracy for Linear Kernel
accuracy_linear = accuracy_score(y_test, y_pred_linear)
print(f"Accuracy with Linear Kernel: {accuracy_linear * 100:.2f}%")

# ii. Polynomial Kernel SVM
svm_poly = SVC(kernel='poly', degree=3, random_state=42)  # Degree 3 for polynomial kernel
svm_poly.fit(X_train, y_train)
y_pred_poly = svm_poly.predict(X_test)

# Accuracy for Polynomial Kernel
accuracy_poly = accuracy_score(y_test, y_pred_poly)
print(f"Accuracy with Polynomial Kernel: {accuracy_poly * 100:.2f}%")


Accuracy with Linear Kernel: 97.00%
Accuracy with Polynomial Kernel: 78.75%
