In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score


In [2]:
# Load the dataset
df = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

# Display the first few rows of the dataset
print(df.head())

   battery_power  blue  clock_speed  dual_sim    fc  four_g  int_memory  \
0            842     0          2.2         0   1.0     0.0         7.0   
1           1021     1          0.5         1   0.0     1.0        53.0   
2            563     1          0.5         1   2.0     1.0        41.0   
3            615     1          2.5         0   0.0     0.0        10.0   
4           1821     1          1.2         0  13.0     1.0        44.0   

   m_dep  mobile_wt  n_cores  ...  px_height  px_width     ram  sc_h  sc_w  \
0    0.6      188.0      2.0  ...       20.0     756.0  2549.0   9.0   7.0   
1    0.7      136.0      3.0  ...      905.0    1988.0  2631.0  17.0   3.0   
2    0.9      145.0      5.0  ...     1263.0    1716.0  2603.0  11.0   2.0   
3    0.8      131.0      6.0  ...     1216.0    1786.0  2769.0  16.0   8.0   
4    0.6      141.0      2.0  ...     1208.0    1212.0  1411.0   8.0   2.0   

   talk_time  three_g  touch_screen  wifi  price_range  
0         19        0  

In [3]:
# Check for missing values
print(df.isnull().sum())

df = df.fillna(0)

# Convert categorical variables to numerical
categorical_features = ['blue', 'dual_sim', 'four_g', 'three_g', 'touch_screen', 'wifi']
df[categorical_features] = df[categorical_features].astype(int)

# Feature scaling
scaler = StandardScaler()
scaled_features = scaler.fit_transform(df.drop('price_range', axis=1))

# Create a DataFrame with the scaled features
scaled_df = pd.DataFrame(scaled_features, columns=df.columns[:-1])
scaled_df['price_range'] = df['price_range']


battery_power    0
blue             0
clock_speed      0
dual_sim         0
fc               5
four_g           5
int_memory       5
m_dep            5
mobile_wt        4
n_cores          4
pc               5
px_height        4
px_width         2
ram              2
sc_h             1
sc_w             1
talk_time        0
three_g          0
touch_screen     0
wifi             0
price_range      0
dtype: int64


In [4]:
X = scaled_df.drop('price_range', axis=1)
y = scaled_df['price_range']


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [5]:
# Initialize the model
model = SVC(decision_function_shape='ovo')

# Train the model
model.fit(X_train, y_train)


In [6]:
# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.89
Confusion Matrix:
 [[ 97   8   0   0]
 [  6  80   5   0]
 [  0  13  75   4]
 [  0   0   8 104]]
Classification Report:
               precision    recall  f1-score   support

           0       0.94      0.92      0.93       105
           1       0.79      0.88      0.83        91
           2       0.85      0.82      0.83        92
           3       0.96      0.93      0.95       112

    accuracy                           0.89       400
   macro avg       0.89      0.89      0.89       400
weighted avg       0.89      0.89      0.89       400



In [8]:
def drop_col(data):
    for col in ["id"]: data.drop(col, axis=1, inplace=True)

    return data

# Check for missing values
print(test.isnull().sum())

test = drop_col(test)

test = test.fillna(0)

# Convert categorical variables to numerical
categorical_features = ['blue', 'dual_sim', 'four_g', 'three_g', 'touch_screen', 'wifi']
test[categorical_features] = test[categorical_features].astype(int)

# Feature scaling
scaler = StandardScaler()
scaled_features = scaler.fit_transform(test)

# Create a DataFrame with the scaled features
scaled_test = pd.DataFrame(scaled_features, columns=test.columns)
print(scaled_test)

id               0
battery_power    0
blue             0
clock_speed      0
dual_sim         0
fc               0
four_g           0
int_memory       0
m_dep            0
mobile_wt        0
n_cores          0
pc               0
px_height        0
px_width         0
ram              0
sc_h             0
sc_w             0
talk_time        0
three_g          0
touch_screen     0
wifi             0
dtype: int64
     battery_power      blue  clock_speed  dual_sim        fc    four_g  \
0        -0.475451  0.968496     0.312601  0.966559  2.108676 -0.974329   
1        -0.942782  0.968496    -1.255832  0.966559 -0.132927  1.026347   
2         1.292077  0.968496     1.519087 -1.034598 -0.805408 -0.974329   
3         0.688249 -1.032529    -1.255832  0.966559  3.005317  1.026347   
4         0.429135 -1.032529    -0.169994 -1.034598  1.436195  1.026347   
..             ...       ...          ...       ...       ...       ...   
995       1.044531  0.968496     0.433249 -1.034598 -1.029568  

In [9]:
predict = model.predict(scaled_test)
print(predict)

[3 3 2 3 1 3 3 1 3 0 3 3 0 0 2 0 2 1 3 1 1 3 1 1 3 0 2 0 2 0 2 0 3 0 1 1 3
 2 2 1 1 2 0 0 0 1 0 3 1 2 1 0 2 0 3 1 3 1 0 3 3 2 0 1 0 1 2 3 1 2 1 2 2 3
 3 0 2 0 2 3 0 3 3 0 3 0 2 1 3 0 1 1 2 1 2 1 0 2 1 3 1 0 0 3 1 2 1 1 2 3 3
 2 1 3 3 3 3 2 3 0 0 3 2 1 2 1 3 2 2 1 0 2 1 1 3 1 1 0 3 2 1 2 1 2 2 3 3 2
 2 3 2 3 1 0 3 2 3 3 2 3 3 2 3 3 3 3 1 0 3 0 0 0 1 1 0 1 0 0 0 2 1 0 0 1 1
 2 2 1 0 0 0 1 1 3 1 0 2 2 2 3 1 1 3 3 2 2 1 1 0 0 1 3 0 2 3 3 0 2 0 3 2 3
 3 1 0 1 0 3 0 1 0 2 2 1 2 1 2 0 3 1 2 0 0 2 1 2 2 3 1 1 3 0 0 2 2 3 1 3 1
 1 3 2 1 2 3 3 3 1 0 1 2 3 1 1 3 2 0 3 0 1 2 0 0 3 2 3 3 2 1 3 3 2 3 1 2 1
 2 0 3 3 1 0 0 3 0 3 1 1 2 0 2 3 1 3 2 2 0 2 0 0 0 1 3 2 0 1 0 3 1 0 3 3 1
 2 3 2 3 1 3 3 2 2 2 2 3 0 3 0 3 1 3 1 2 3 0 1 1 3 1 3 2 3 0 0 1 0 1 0 0 2
 2 1 2 2 2 0 1 0 0 3 2 0 3 1 2 2 1 2 3 1 1 2 2 1 2 0 1 1 0 3 2 1 0 1 0 0 1
 1 0 0 0 2 2 3 1 3 0 2 0 3 0 1 1 1 2 0 3 2 3 3 1 3 2 2 1 2 2 0 1 1 1 2 0 0
 0 1 2 1 1 2 1 0 2 2 0 0 3 1 1 1 2 2 3 0 3 0 2 3 2 3 0 2 0 2 3 0 1 1 0 0 1
 1 1 2 3 3 1 3 1 2 2 2 3 

In [10]:
import pickle

# Save the model to a file
with open('/content/drive/MyDrive/Colab Notebooks/model.pkl', 'wb') as file:
    pickle.dump(model, file)
