# Library

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Data Loading

In [2]:
missing_values = ["n/a", "na", "--", " ", "N/A", "NA","unknown"]

df = pd.read_csv('smartphones.csv', na_values=missing_values)
df.head()

Unnamed: 0,model,price,rating,sim,processor,ram,battery,display,camera,card,os
0,OnePlus 11 5G,"₹54,999",89.0,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC","Snapdragon 8 Gen2, Octa Core, 3.2 GHz Processor","12 GB RAM, 256 GB inbuilt",5000 mAh Battery with 100W Fast Charging,"6.7 inches, 1440 x 3216 px, 120 Hz Display wit...",50 MP + 48 MP + 32 MP Triple Rear & 16 MP Fron...,Memory Card Not Supported,Android v13
1,OnePlus Nord CE 2 Lite 5G,"₹19,989",81.0,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi","Snapdragon 695, Octa Core, 2.2 GHz Processor","6 GB RAM, 128 GB inbuilt",5000 mAh Battery with 33W Fast Charging,"6.59 inches, 1080 x 2412 px, 120 Hz Display wi...",64 MP + 2 MP + 2 MP Triple Rear & 16 MP Front ...,"Memory Card (Hybrid), upto 1 TB",Android v12
2,Samsung Galaxy A14 5G,"₹16,499",75.0,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi","Exynos 1330, Octa Core, 2.4 GHz Processor","4 GB RAM, 64 GB inbuilt",5000 mAh Battery with 15W Fast Charging,"6.6 inches, 1080 x 2408 px, 90 Hz Display with...",50 MP + 2 MP + 2 MP Triple Rear & 13 MP Front ...,"Memory Card Supported, upto 1 TB",Android v13
3,Motorola Moto G62 5G,"₹14,999",81.0,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi","Snapdragon 695, Octa Core, 2.2 GHz Processor","6 GB RAM, 128 GB inbuilt",5000 mAh Battery with Fast Charging,"6.55 inches, 1080 x 2400 px, 120 Hz Display wi...",50 MP + 8 MP + 2 MP Triple Rear & 16 MP Front ...,"Memory Card (Hybrid), upto 1 TB",Android v12
4,Realme 10 Pro Plus,"₹24,999",82.0,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi","Dimensity 1080, Octa Core, 2.6 GHz Processor","6 GB RAM, 128 GB inbuilt",5000 mAh Battery with 67W Fast Charging,"6.7 inches, 1080 x 2412 px, 120 Hz Display wit...",108 MP + 8 MP + 2 MP Triple Rear & 16 MP Front...,Memory Card Not Supported,Android v13


In [3]:
baris,kolom = df.shape
print('baris:', baris)
print('kolom:', kolom)

baris: 1020
kolom: 11


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1020 entries, 0 to 1019
Data columns (total 11 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   model      1020 non-null   object 
 1   price      1020 non-null   object 
 2   rating     879 non-null    float64
 3   sim        1020 non-null   object 
 4   processor  1020 non-null   object 
 5   ram        1020 non-null   object 
 6   battery    1020 non-null   object 
 7   display    1020 non-null   object 
 8   camera     1019 non-null   object 
 9   card       1013 non-null   object 
 10  os         1003 non-null   object 
dtypes: float64(1), object(10)
memory usage: 87.8+ KB


In [5]:
df.isnull().sum()

model          0
price          0
rating       141
sim            0
processor      0
ram            0
battery        0
display        0
camera         1
card           7
os            17
dtype: int64

In [6]:
df.duplicated().sum()

0

In [7]:
df.columns

Index(['model', 'price', 'rating', 'sim', 'processor', 'ram', 'battery',
       'display', 'camera', 'card', 'os'],
      dtype='object')

# Preprocessing & Cleaning

In [8]:
# hapus fitur yang tidak perlu
df.drop(['rating'], axis=1, inplace=True)

In [9]:
# hapus missing value
df.dropna(inplace=True)

In [10]:
df.sample(3)

Unnamed: 0,model,price,sim,processor,ram,battery,display,camera,card,os
18,OnePlus 10R 5G,"₹32,999","Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC","Dimensity 8100 Max, Octa Core, 2.85 GHz Processor","8 GB RAM, 128 GB inbuilt",5000 mAh Battery with 80W Fast Charging,"6.7 inches, 1080 x 2412 px, 120 Hz Display wit...",50 MP + 8 MP + 2 MP Triple Rear & 16 MP Front ...,Android v12,Bluetooth
57,Xiaomi Redmi Note 10S,"₹10,999","Dual Sim, 3G, 4G, VoLTE, Wi-Fi, IR Blaster","Helio G95, Octa Core, 2.05 GHz Processor","6 GB RAM, 64 GB inbuilt",5000 mAh Battery with 33W Fast Charging,"6.43 inches, 1080 x 2400 px Display with Punch...",64 MP Quad Rear & 13 MP Front Camera,"Memory Card Supported, upto 512 GB",Android v11
863,Vivo Y19,"₹20,000","Dual Sim, 3G, 4G, VoLTE, Wi-Fi","Helio P65 , Octa Core, 2 GHz Processor","4 GB RAM, 128 GB inbuilt",5000 mAh Battery with 18W Fast Charging,"6.53 inches, 1080 x 2340 px Display with Water...",16 MP + 8 MP + 2 MP Triple Rear & 16 MP Front ...,"Memory Card Supported, upto 256 GB",Android v9.0 (Pie)


In [11]:
import requests
# convert price dari mata uang india ke rupiah
url = 'https://api.exchangerate-api.com/v4/latest/INR'
response = requests.get(url)
data = response.json()

In [12]:
kurs_rupiah = data['rates']['IDR']

df['price'] = df['price'].str.replace('₹', '').str.replace(',', '').astype(float)
df['price'] = round(df['price'] * kurs_rupiah, 0)
# bulatkan 3 angka terakhir
df['price'] = df['price'].astype(int)

In [13]:
df.sample(3)

Unnamed: 0,model,price,sim,processor,ram,battery,display,camera,card,os
897,Tecno Spark 8T,1503172,"Dual Sim, 3G, 4G, VoLTE, Wi-Fi","Helio G35, Octa Core, 2.3 GHz Processor","4 GB RAM, 64 GB inbuilt",5000 mAh Battery,"6.6 inches, 720 x 1600 px Display with Water D...",50 MP + 2 MP Dual Rear & 8 MP Front Camera,Memory Card Supported,Android v11
529,Samsung Galaxy F14,2816921,"Dual Sim, 3G, 4G, VoLTE, Wi-Fi, NFC",Octa Core Processor,"4 GB RAM, 64 GB inbuilt",5000 mAh Battery with 25W Fast Charging,"6.62 inches, 2408 x 1080 px Display with Punch...",50 MP + 5 MP + 2 MP Triple Rear & 16 MP Front ...,"Memory Card Supported, upto 1 TB",Android v13
782,Vivo Y12G (3GB RAM + 64GB),2253161,"Dual Sim, 3G, 4G, VoLTE, Wi-Fi","Snapdragon 439 , Octa Core, 2 GHz Processor","3 GB RAM, 64 GB inbuilt",5000 mAh Battery,"6.51 inches, 720 x 1600 px Display with Water ...",13 MP + 2 MP + 2 MP Triple Rear & 8 MP Front C...,Memory Card Supported,Android v11


In [14]:
# cek model Nokia C01 Plus
df[df['model'] == 'Samsung Galaxy M14']

Unnamed: 0,model,price,sim,processor,ram,battery,display,camera,card,os
830,Samsung Galaxy M14,2818612,"Dual Sim, 3G, 4G, VoLTE, Wi-Fi","Exynos 1330, Octa Core Processor","4 GB RAM, 64 GB inbuilt",6000 mAh Battery,"6.5 inches, 720 x 1600 px Display with Water D...",48 MP + 8 MP + 2 MP Triple Rear & 8 MP Front C...,"Memory Card Supported, upto 512 GB",Android v13


In [15]:
df.describe(include=object)

Unnamed: 0,model,sim,processor,ram,battery,display,camera,card,os
count,1003,1003,1003,1003,1003,1003,1003,1003,1003
unique,1003,25,289,45,244,359,280,61,48
top,OnePlus 11 5G,"Dual Sim, 3G, 4G, VoLTE, Wi-Fi","Dimensity 700 5G, Octa Core, 2.2 GHz Processor","8 GB RAM, 128 GB inbuilt",5000 mAh Battery with 33W Fast Charging,"6.67 inches, 1080 x 2400 px, 120 Hz Display wi...",50 MP + 8 MP + 2 MP Triple Rear & 16 MP Front ...,"Memory Card Supported, upto 1 TB",Android v12
freq,1,324,29,267,103,54,40,171,287


In [16]:
# encoede fitur kategorikal, keculai model
from sklearn.preprocessing import LabelEncoder

encoder = LabelEncoder()