In [1]:
import numpy as np
import pandas as pd
from ml.utils.utils import get_data, export_data

## Constants

In [2]:
processor_rows = set((642,647,649,659,667,701,750,759,819,859,883,884,919,927,929,932,1002))
ram_rows = set((441,485,534,553,584,610,613,642,647,649,659,667,701,750,759,819,859,884,919,927,929,932,990,1002))
battery_rows = set((113,151,309,365,378,441,450,553,584,610,613,630,642,647,649,659,667,701,750,756,759,764,819,855,859,884,915,916,927,929,932,990,1002))
display_rows = set((378,441,450,553,584,610,613,630,642,647,649,659,667,701,750,759,764,819,859,884,915,916,927,929,932,990,1002))
camera_rows = set((100,113,151,157,161,238,273,308,309,323,324,365,367,378,394,441,450,484,506,534,553,571,572,575,584,610,613,615,630,642,647,649,659,667,684,687,705,711,723,728,750,756,759,764,792,819,846,854,855,858,883,884,896,915,916,927,929,932,945,956,990,995,1002,1016 ))

In [3]:
df = get_data("smartphones", "raw")

In [3]:
df.columns

Index(['model', 'price', 'rating', 'sim', 'processor', 'ram', 'battery',
       'display', 'camera', 'card', 'os'],
      dtype='object')

In [4]:
df.head()

Unnamed: 0,model,price,rating,sim,processor,ram,battery,display,camera,card,os
0,OnePlus 11 5G,"₹54,999",89.0,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC","Snapdragon 8 Gen2, Octa Core, 3.2 GHz Processor","12 GB RAM, 256 GB inbuilt",5000 mAh Battery with 100W Fast Charging,"6.7 inches, 1440 x 3216 px, 120 Hz Display wit...",50 MP + 48 MP + 32 MP Triple Rear & 16 MP Fron...,Memory Card Not Supported,Android v13
1,OnePlus Nord CE 2 Lite 5G,"₹19,989",81.0,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi","Snapdragon 695, Octa Core, 2.2 GHz Processor","6 GB RAM, 128 GB inbuilt",5000 mAh Battery with 33W Fast Charging,"6.59 inches, 1080 x 2412 px, 120 Hz Display wi...",64 MP + 2 MP + 2 MP Triple Rear & 16 MP Front ...,"Memory Card (Hybrid), upto 1 TB",Android v12
2,Samsung Galaxy A14 5G,"₹16,499",75.0,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi","Exynos 1330, Octa Core, 2.4 GHz Processor","4 GB RAM, 64 GB inbuilt",5000 mAh Battery with 15W Fast Charging,"6.6 inches, 1080 x 2408 px, 90 Hz Display with...",50 MP + 2 MP + 2 MP Triple Rear & 13 MP Front ...,"Memory Card Supported, upto 1 TB",Android v13
3,Motorola Moto G62 5G,"₹14,999",81.0,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi","Snapdragon 695, Octa Core, 2.2 GHz Processor","6 GB RAM, 128 GB inbuilt",5000 mAh Battery with Fast Charging,"6.55 inches, 1080 x 2400 px, 120 Hz Display wi...",50 MP + 8 MP + 2 MP Triple Rear & 16 MP Front ...,"Memory Card (Hybrid), upto 1 TB",Android v12
4,Realme 10 Pro Plus,"₹24,999",82.0,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi","Dimensity 1080, Octa Core, 2.6 GHz Processor","6 GB RAM, 128 GB inbuilt",5000 mAh Battery with 67W Fast Charging,"6.7 inches, 1080 x 2412 px, 120 Hz Display wit...",108 MP + 8 MP + 2 MP Triple Rear & 16 MP Front...,Memory Card Not Supported,Android v13


## 1. Data Assessing

### 1.1 Quality Issues

1. **model** - some brands are written diiferently like OPPO in model column `consistency`
2. **price** - has unneccesary '₹' `validity`
3. **price** - has ',' between numbers `validity`
4. **price** - phone Namotel has a price of 99 `accuracy`
5. **ratings** - missing values `completeness`
6. **processor** - has some incorrect values for some samsung phones(row # -642,647,649,659,667,701,750,759,819,859,883,884,919,927,929,932,1002) `validity`
7. There is ipod on row 756 `validity`
8. **memory** - incorrect values in rows (441,485,534,553,584,610,613,642,647,649,659,667,701,750,759,819,859,884,919,927,929,932,990,1002) `validity`
9. **battery** - incorrect values in rows(113,151,309,365,378,441,450,553,584,610,613,630,642,647,649,659,667,701,750,756,759,764,819,855,859,884,915,916,927,929,932,990,1002) `validity`
10. **display** - sometimes frequency is not available `completeness`
11. **display** - incorrect values in rows(378,441,450,553,584,610,613,630,642,647,649,659,667,701,750,759,764,819,859,884,915,916,927,929,932,990,1002) `validity`
12. certain phones are foldable and the info is scattered `validity`
13. **camera** - words like Dual, Triple and Quad are used to represent number of cameras and front and rear cameras are separated by '&'
14. **camera** - problem with rows (100,113,151,157,161,238,273,308,309,323,324,365,367,378,394,441,450,484,506,534,553,571,572,575,584,610,613,615,630,642,647,649,659,667,684,687,705,711,723,728,750,756,759,764,792,819,846,854,855,858,883,884,896,915,916,927,929,932,945,956,990,995,1002,1016
) `validity`
15. **card** - sometimes contains info about os and camera `validity`
16. **os** - sometimes contains info about bluetooth and fm radio `validity`
17. **os** - issue with rows (324,378) `validity`
18. **os** - sometimes contains os version name like lollipop `consistency`
19. missing values in camera, card and os `completeness`
20. datatype  of price and rating is incorrect `validity`



### 1.2 Tidiness Issues

1. **sim** - can be split into 3 cols has_5g, has_NFC, has_IR_Blaster
2. **ram** - can be split into 2 cols RAM and ROM
3. **processor** - can be split into processor name, cores and cpu speed.
4. **battery** - can be split into battery capacity, fast_charging_available
5. **display** - can be split into size, resolution_width, resolution_height and frequency
6. **camera** - can be split into front and rear camera
7. **card** - can be split into supported, extended_upto

## 2. Preliminary Analysis

### 2.1 Check Data Types

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1020 entries, 0 to 1019
Data columns (total 11 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   model      1020 non-null   object 
 1   price      1020 non-null   object 
 2   rating     879 non-null    float64
 3   sim        1020 non-null   object 
 4   processor  1020 non-null   object 
 5   ram        1020 non-null   object 
 6   battery    1020 non-null   object 
 7   display    1020 non-null   object 
 8   camera     1019 non-null   object 
 9   card       1013 non-null   object 
 10  os         1003 non-null   object 
dtypes: float64(1), object(10)
memory usage: 87.8+ KB


### 2.2 Descriptive Statistics

In [8]:
df.describe()

Unnamed: 0,rating
count,879.0
mean,78.258248
std,7.402854
min,60.0
25%,74.0
50%,80.0
75%,84.0
max,89.0


### 2.3 Check for Duplicates

In [9]:
df.duplicated().sum()

np.int64(0)

## 3. Detailed Analysis (Column wise)

### 3.1 price

In [11]:
(
    df.price
    .str.replace('₹', '', regex=False)  
    .str.replace(',', '', regex=False)
    .astype("int")
    .describe()
)

count      1020.000000
mean      31371.767647
std       39168.942590
min          99.000000
25%       12464.250000
50%       19815.000000
75%       34999.000000
max      650000.000000
Name: price, dtype: float64

In [16]:
(
    df
    .pipe(adjust_index) 
    .reset_index()  
    .loc[
        lambda x: x['index'].isin(
            processor_rows
            .union(ram_rows)
            .union(battery_rows)
            .union(display_rows)
            .union(camera_rows)
        )
    ]
)


Unnamed: 0,level_0,index,model,price,rating,sim,processor,ram,battery,display,camera,card,os
98,98,100,Vivo X Fold 5G,"₹1,06,990",,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC","Snapdragon 8 Gen1, Octa Core, 3 GHz Processor","12 GB RAM, 256 GB inbuilt",4600 mAh Battery with 66W Fast Charging,"8.03 inches, 1916 x 2160 px, 120 Hz Display",Foldable Display,50 MP Quad Rear & 16 MP Front Camera,Android v12
111,111,113,Apple iPhone 12,"₹51,999",74.0,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC","Bionic A14, Hexa Core, 3.1 GHz Processor","4 GB RAM, 64 GB inbuilt","6.1 inches, 1170 x 2532 px Display with Large ...",12 MP + 12 MP Dual Rear & 12 MP Front Camera,Memory Card Not Supported,iOS v14,No FM Radio
149,149,151,Apple iPhone 12 Mini,"₹40,999",74.0,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC","Bionic A14, Hexa Core, 3.1 GHz Processor","4 GB RAM, 64 GB inbuilt","5.4 inches, 1080 x 2340 px Display",12 MP + 12 MP Dual Rear & 12 MP Front Camera,Memory Card Not Supported,iOS v14,No FM Radio
155,155,157,Nokia 2780 Flip,"₹4,990",,"Dual Sim, 3G, 4G, Wi-Fi","Snapdragon QM215, Quad Core, 1.3 GHz Processor","4 GB RAM, 512 MB inbuilt",1450 mAh Battery,"2.7 inches, 240 x 320 px Display",Dual Display,5 MP Rear Camera,"Memory Card Supported, upto 32 GB"
159,159,161,Oppo Find N2 5G,"₹94,990",,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC","Snapdragon 8+ Gen1, Octa Core, 3.2 GHz Processor","12 GB RAM, 256 GB inbuilt",4520 mAh Battery with 67W Fast Charging,"7.1 inches, 1792 x 1920 px, 120 Hz Display wit...","Foldable Display, Dual Display",50 MP + 48 MP + 32 MP Triple Rear & 32 MP + 32...,Memory Card Not Supported
...,...,...,...,...,...,...,...,...,...,...,...,...,...
954,954,956,Vivo X Fold 5G (12GB RAM + 512GB),"₹1,18,990",,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC","Snapdragon 8 Gen1, Octa Core, 3 GHz Processor","12 GB RAM, 512 GB inbuilt",4600 mAh Battery with 66W Fast Charging,"8.03 inches, 1916 x 2160 px, 120 Hz Display",Foldable Display,50 MP Quad Rear & 16 MP Front Camera,Android v12
988,988,990,Nokia 5310 Dual Sim,"₹3,399",,Dual Sim,"8 MB RAM, 16 MB inbuilt",1200 mAh Battery,"2.4 inches, 240 x 320 px Display",0.3 MP Rear Camera,"Memory Card Supported, upto 32 GB",Bluetooth,Browser
993,993,995,Huawei Mate X,"₹1,69,000",,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC, IR Bl...","Kirin 990, Octa Core, 2.86 GHz Processor","8 GB RAM, 512 GB inbuilt",4500 mAh Battery with 55W Fast Charging,"8 inches, 2200 x 2480 px Display",Foldable Display,48 MP Quad Rear Camera,"Memory Card (Hybrid), upto 256 GB"
1000,1000,1002,XTouch F40 Flip,"₹1,999",,Dual Sim,No 3G,No Wifi,"32 MB RAM, 32 MB inbuilt",800 mAh Battery,"1.77 inches, 240 x 320 px Display",Dual Display,1.3 MP Rear Camera


In [17]:
(
    df
    .pipe(adjust_index)  
    .reset_index()  
    .loc[  
        lambda x: x['index'].isin(
            processor_rows
            & ram_rows
            & battery_rows
            & display_rows
            & camera_rows
        )
    ]
    # .shape
)


Unnamed: 0,level_0,index,model,price,rating,sim,processor,ram,battery,display,camera,card,os
640,640,642,Nokia 105 Plus,"₹1,299",,Dual Sim,"4 MB RAM, 4 MB inbuilt",800 mAh Battery,"1.77 inches, 128 x 160 px Display",No Rear Camera,"Memory Card Supported, upto 32 GB",Bluetooth,
645,645,647,Nokia 2760 Flip,"₹5,490",,"Dual Sim, 3G, 4G, Wi-Fi",1450 mAh Battery,"3.6 inches, 240 x 320 px Display",5 MP Rear & 5 MP Front Camera,"Memory Card Supported, upto 32 GB",Kaios v3.0,Bluetooth,
647,647,649,Motorola Moto A10,"₹1,339",,Dual Sim,"4 MB RAM, 4 MB inbuilt",1750 mAh Battery,"1.8 inches, 160 x 128 px Display",No Rear Camera,"Memory Card Supported, upto 32 GB",,
657,657,659,Zanco Tiny T1,"₹2,799",,Single Sim,"32 MB RAM, 32 MB inbuilt",200 mAh Battery,"0.49 inches, 64 x 32 px Display",No Rear Camera,No FM Radio,Bluetooth,
665,665,667,itel it2163S,₹958,,Dual Sim,"4 MB RAM, 4 MB inbuilt",1200 mAh Battery,"1.8 inches, 160 x 128 px Display",No Rear Camera,"Memory Card Supported, upto 32 GB",Bluetooth,
748,748,750,Nokia 400 4G,"₹3,290",,"Dual Sim, 4G, VoLTE, Wi-Fi",2000 mAh Battery,"2.4 inches, 240 x 320 px Display",0.3 MP Rear & 0.3 MP Front Camera,"Memory Card Supported, upto 64 GB",Bluetooth,Browser,
757,757,759,Karbonn KU3i,₹995,,Dual Sim,"52 MB RAM, 32 MB inbuilt",1000 mAh Battery,"1.8 inches, 128 x 160 px Display",No Rear Camera,"Memory Card Supported, upto 16 GB",Bluetooth,
817,817,819,itel Magic X,"₹2,239",,"Dual Sim, 3G, 4G, VoLTE, Wi-Fi",No 3G,T117,"48 MB RAM, 128 MB inbuilt",1200 mAh Battery,"2.4 inches, 240 x 320 px Display",1.3 MP Rear Camera,"Memory Card Supported, upto 64 GB"
882,882,884,Nokia 5710 XpressAudio,"₹4,799",,"Dual Sim, 3G, 4G",No Wifi,Unisoc T107,"48 MB RAM, 128 MB inbuilt",1450 mAh Battery,"2.4 inches, 240 x 320 px Display",0.3 MP Rear Camera,"Memory Card Supported, upto 32 GB"
925,925,927,Nokia 3310 4G,"₹3,999",,"Dual Sim, 3G, 4G, VoLTE, Wi-Fi","256 MB RAM, 512 MB inbuilt",1200 mAh Battery,"2.4 inches, 240 x 320 px Display",2 MP Rear Camera,"Memory Card Supported, upto 32 GB",Bluetooth,Browser


In [18]:
(
    df
    .assign(
        price = lambda df_: (
            df_.price
            .str.replace('₹', '', regex=False)  
            .str.replace(',', '', regex=False) 
            .astype('int')  
        )
    )
    .loc[lambda x: x['price'] >= 3400] 
    # .shape
)


Unnamed: 0,model,price,rating,sim,processor,ram,battery,display,camera,card,os
0,OnePlus 11 5G,54999,89.0,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC","Snapdragon 8 Gen2, Octa Core, 3.2 GHz Processor","12 GB RAM, 256 GB inbuilt",5000 mAh Battery with 100W Fast Charging,"6.7 inches, 1440 x 3216 px, 120 Hz Display wit...",50 MP + 48 MP + 32 MP Triple Rear & 16 MP Fron...,Memory Card Not Supported,Android v13
1,OnePlus Nord CE 2 Lite 5G,19989,81.0,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi","Snapdragon 695, Octa Core, 2.2 GHz Processor","6 GB RAM, 128 GB inbuilt",5000 mAh Battery with 33W Fast Charging,"6.59 inches, 1080 x 2412 px, 120 Hz Display wi...",64 MP + 2 MP + 2 MP Triple Rear & 16 MP Front ...,"Memory Card (Hybrid), upto 1 TB",Android v12
2,Samsung Galaxy A14 5G,16499,75.0,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi","Exynos 1330, Octa Core, 2.4 GHz Processor","4 GB RAM, 64 GB inbuilt",5000 mAh Battery with 15W Fast Charging,"6.6 inches, 1080 x 2408 px, 90 Hz Display with...",50 MP + 2 MP + 2 MP Triple Rear & 13 MP Front ...,"Memory Card Supported, upto 1 TB",Android v13
3,Motorola Moto G62 5G,14999,81.0,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi","Snapdragon 695, Octa Core, 2.2 GHz Processor","6 GB RAM, 128 GB inbuilt",5000 mAh Battery with Fast Charging,"6.55 inches, 1080 x 2400 px, 120 Hz Display wi...",50 MP + 8 MP + 2 MP Triple Rear & 16 MP Front ...,"Memory Card (Hybrid), upto 1 TB",Android v12
4,Realme 10 Pro Plus,24999,82.0,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi","Dimensity 1080, Octa Core, 2.6 GHz Processor","6 GB RAM, 128 GB inbuilt",5000 mAh Battery with 67W Fast Charging,"6.7 inches, 1080 x 2412 px, 120 Hz Display wit...",108 MP + 8 MP + 2 MP Triple Rear & 16 MP Front...,Memory Card Not Supported,Android v13
...,...,...,...,...,...,...,...,...,...,...,...
1015,Motorola Moto Edge S30 Pro,34990,83.0,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi","Snapdragon 8 Gen1, Octa Core, 3 GHz Processor","8 GB RAM, 128 GB inbuilt",5000 mAh Battery with 68.2W Fast Charging,"6.67 inches, 1080 x 2460 px, 120 Hz Display wi...",64 MP + 8 MP + 2 MP Triple Rear & 16 MP Front ...,Android v12,No FM Radio
1016,Honor X8 5G,14990,75.0,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi","Snapdragon 480+, Octa Core, 2.2 GHz Processor","6 GB RAM, 128 GB inbuilt",5000 mAh Battery with 22.5W Fast Charging,"6.5 inches, 720 x 1600 px Display with Water D...",48 MP + 2 MP + Depth Sensor Triple Rear & 8 MP...,"Memory Card Supported, upto 1 TB",Android v11
1017,POCO X4 GT 5G (8GB RAM + 256GB),28990,85.0,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC, IR Bl...","Dimensity 8100, Octa Core, 2.85 GHz Processor","8 GB RAM, 256 GB inbuilt",5080 mAh Battery with 67W Fast Charging,"6.6 inches, 1080 x 2460 px, 144 Hz Display wit...",64 MP + 8 MP + 2 MP Triple Rear & 16 MP Front ...,Memory Card Not Supported,Android v12
1018,Motorola Moto G91 5G,19990,80.0,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC","Snapdragon 695, Octa Core, 2.2 GHz Processor","6 GB RAM, 128 GB inbuilt",5000 mAh Battery with Fast Charging,"6.8 inches, 1080 x 2400 px Display with Punch ...",108 MP + 8 MP + 2 MP Triple Rear & 32 MP Front...,"Memory Card Supported, upto 1 TB",Android v12


### 3.2 processor

In [19]:
(
    df
    .assign(
        price = lambda df_: (
            df_.price
            .str.replace('₹', '', regex=False)  
            .str.replace(',', '', regex=False) 
            .astype('int')  
        )
    )
    .loc[lambda x: x['price'] >= 3400] 
    .pipe(adjust_index)  
    .reset_index()  
    .loc[  
        lambda x: x['index'].isin(
            processor_rows          
        )
    ]
    # .shape
)

Unnamed: 0,level_0,index,model,price,rating,sim,processor,ram,battery,display,camera,card,os
635,635,647,Nokia 2760 Flip,5490,,"Dual Sim, 3G, 4G, Wi-Fi",1450 mAh Battery,"3.6 inches, 240 x 320 px Display",5 MP Rear & 5 MP Front Camera,"Memory Card Supported, upto 32 GB",Kaios v3.0,Bluetooth,
836,836,859,LG Folder 2,11999,,"Single Sim, 3G, 4G, Wi-Fi","1 GB RAM, 8 GB inbuilt",1470 mAh Battery,"2.8 inches, 240 x 320 px Display",2 MP Rear Camera,Memory Card Supported,Bluetooth,
859,859,884,Nokia 5710 XpressAudio,4799,,"Dual Sim, 3G, 4G",No Wifi,Unisoc T107,"48 MB RAM, 128 MB inbuilt",1450 mAh Battery,"2.4 inches, 240 x 320 px Display",0.3 MP Rear Camera,"Memory Card Supported, upto 32 GB"
901,901,927,Nokia 3310 4G,3999,,"Dual Sim, 3G, 4G, VoLTE, Wi-Fi","256 MB RAM, 512 MB inbuilt",1200 mAh Battery,"2.4 inches, 240 x 320 px Display",2 MP Rear Camera,"Memory Card Supported, upto 32 GB",Bluetooth,Browser


### 3.3 ram

In [34]:
(
    clean_data(df)  
    .loc[  
        lambda x: x['index'].isin(
            ram_rows          
        )
    ]
    # .shape
)

Unnamed: 0,index,model,price,rating,sim,processor,ram,battery,display,camera,card,os
439,441,Apple iPhone SE 3 2022,43900,,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC","Bionic A15, Hexa Core, 3.22 GHz Processor",64 GB inbuilt,"4.7 inches, 750 x 1334 px Display",12 MP Rear & 7 MP Front Camera,Memory Card Not Supported,iOS v15,No FM Radio
483,485,Huawei Mate 50 RS Porsche Design,239999,81.0,"Dual Sim, 3G, 4G, VoLTE, Wi-Fi, NFC, IR Blaster","Snapdragon 8+ Gen1, Octa Core, 3.2 GHz Processor",512 GB inbuilt,4700 mAh Battery with 66W Fast Charging,"6.74 inches, 1212 x 2616 px, 120 Hz Display",50 MP + 48 MP + 13 MP Triple Rear & 13 MP Fron...,"Memory Card (Hybrid), upto 256 GB",Hongmeng OS v3.0
582,584,Nokia 8210 4G,3749,,"Dual Sim, 3G, 4G",No Wifi,Unisoc T107,"48 MB RAM, 128 MB inbuilt",1450 mAh Battery,"2.8 inches, 240 x 320 px Display",0.3 MP Rear Camera,"Memory Card Supported, upto 32 GB"


### 3.4 battery

In [48]:
(
    clean_data(df)  
    .loc[  
        lambda x: x['index'].isin(
            battery_rows          
        )
    ]
    # .shape
)

Unnamed: 0,index,model,price,rating,sim,processor,ram,battery,display,camera,card,os
111,113,Apple iPhone 12,51999,74.0,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC","Bionic A14, Hexa Core, 3.1 GHz Processor","4 GB RAM, 64 GB inbuilt","6.1 inches, 1170 x 2532 px Display with Large ...",12 MP + 12 MP Dual Rear & 12 MP Front Camera,Memory Card Not Supported,iOS v14,No FM Radio
149,151,Apple iPhone 12 Mini,40999,74.0,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC","Bionic A14, Hexa Core, 3.1 GHz Processor","4 GB RAM, 64 GB inbuilt","5.4 inches, 1080 x 2340 px Display",12 MP + 12 MP Dual Rear & 12 MP Front Camera,Memory Card Not Supported,iOS v14,No FM Radio
307,309,Apple iPhone 12 (128GB),55999,75.0,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC","Bionic A14, Hexa Core, 3.1 GHz Processor","4 GB RAM, 128 GB inbuilt","6.1 inches, 1170 x 2532 px Display with Large ...",12 MP + 12 MP Dual Rear & 12 MP Front Camera,Memory Card Not Supported,iOS v14,No FM Radio
363,365,Apple iPhone 12 Mini (128GB),45999,75.0,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC","Bionic A14, Hexa Core, 3.1 GHz Processor","4 GB RAM, 128 GB inbuilt","5.4 inches, 1080 x 2340 px Display",12 MP + 12 MP Dual Rear & 12 MP Front Camera,Memory Card Not Supported,iOS v14,No FM Radio
439,441,Apple iPhone SE 3 2022,43900,,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC","Bionic A15, Hexa Core, 3.22 GHz Processor",64 GB inbuilt,"4.7 inches, 750 x 1334 px Display",12 MP Rear & 7 MP Front Camera,Memory Card Not Supported,iOS v15,No FM Radio
448,450,Apple iPhone 15 Pro,130990,75.0,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC",Bionic A16,"8 GB RAM, 128 GB inbuilt","6.06 inches, 1170 x 2532 px, 120 Hz Display wi...",50 MP + 12 MP + 12 MP Triple Rear & 12 MP Fron...,Memory Card Not Supported,iOS v15,No FM Radio
628,630,Apple iPhone 12 Pro (512GB),139900,80.0,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC","Bionic A14, Hexa Core, 3.1 GHz Processor","6 GB RAM, 512 GB inbuilt","6.1 inches, 1170 x 2532 px Display with Large ...",12 MP + 12 MP + 12 MP Triple Rear & 12 MP Fron...,Memory Card Not Supported,iOS v14.0,No FM Radio
762,764,Apple iPhone SE 4,49990,60.0,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC","Bionic A15, Hexa Core, 3.22 GHz Processor",64 GB inbuilt,"6.1 inches, 750 x 1580 px Display",12 MP Rear & 10.8 MP Front Camera,Memory Card Not Supported,iOS v16,No FM Radio
853,855,Apple iPhone 12 Pro (256GB),119900,80.0,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC","Bionic A14, Hexa Core, 3.1 GHz Processor","6 GB RAM, 256 GB inbuilt","6.1 inches, 1170 x 2532 px Display with Large ...",12 MP + 12 MP + 12 MP Triple Rear & 12 MP Fron...,Memory Card Not Supported,iOS v14.0,No FM Radio
913,915,Apple iPhone 12 Mini (256GB),55999,75.0,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC","Bionic A14, Hexa Core, 3.1 GHz Processor","4 GB RAM, 256 GB inbuilt","5.4 inches, 1080 x 2340 px Display",12 MP + 12 MP Dual Rear & 12 MP Front Camera,Memory Card Not Supported,iOS v14,No FM Radio


### 3.5 display

In [56]:
(
    clean_data(df)  
    .loc[  
        lambda x: x['index'].isin(
            display_rows          
        )
    ]
    # .shape
)

Unnamed: 0,index,model,price,rating,sim,processor,ram,battery,display,camera,card,os
439,441,Apple iPhone SE 3 2022,43900,,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC","Bionic A15, Hexa Core, 3.22 GHz Processor",64 GB inbuilt,,"4.7 inches, 750 x 1334 px Display",12 MP Rear & 7 MP Front Camera,Memory Card Not Supported,iOS v15
448,450,Apple iPhone 15 Pro,130990,75.0,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC",Bionic A16,"8 GB RAM, 128 GB inbuilt",,"6.06 inches, 1170 x 2532 px, 120 Hz Display wi...",50 MP + 12 MP + 12 MP Triple Rear & 12 MP Fron...,Memory Card Not Supported,iOS v15
628,630,Apple iPhone 12 Pro (512GB),139900,80.0,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC","Bionic A14, Hexa Core, 3.1 GHz Processor","6 GB RAM, 512 GB inbuilt",,"6.1 inches, 1170 x 2532 px Display with Large ...",12 MP + 12 MP + 12 MP Triple Rear & 12 MP Fron...,Memory Card Not Supported,iOS v14.0
762,764,Apple iPhone SE 4,49990,60.0,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC","Bionic A15, Hexa Core, 3.22 GHz Processor",64 GB inbuilt,,"6.1 inches, 750 x 1580 px Display",12 MP Rear & 10.8 MP Front Camera,Memory Card Not Supported,iOS v16
913,915,Apple iPhone 12 Mini (256GB),55999,75.0,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC","Bionic A14, Hexa Core, 3.1 GHz Processor","4 GB RAM, 256 GB inbuilt",,"5.4 inches, 1080 x 2340 px Display",12 MP + 12 MP Dual Rear & 12 MP Front Camera,Memory Card Not Supported,iOS v14
914,916,Apple iPhone 12 (256GB),67999,76.0,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC","Bionic A14, Hexa Core, 3.1 GHz Processor","4 GB RAM, 256 GB inbuilt",,"6.1 inches, 1170 x 2532 px Display with Large ...",12 MP + 12 MP Dual Rear & 12 MP Front Camera,Memory Card Not Supported,iOS v14


In [57]:
len(display_rows)

27

In [58]:
len(camera_rows)

64

### 3.6 camera

In [60]:
(
    clean_data(df)  
    .loc[  
        lambda x: x['index'].isin(
            camera_rows          
        )
    ]
    # .shape
)

# 155 271

Unnamed: 0,index,model,price,rating,sim,processor,ram,battery,display,camera,card,os
98,100,Vivo X Fold 5G,106990,,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC","Snapdragon 8 Gen1, Octa Core, 3 GHz Processor","12 GB RAM, 256 GB inbuilt",,,,,
111,113,Apple iPhone 12,51999,74.0,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC","Bionic A14, Hexa Core, 3.1 GHz Processor","4 GB RAM, 64 GB inbuilt",,"6.1 inches, 1170 x 2532 px Display with Large ...",12 MP + 12 MP Dual Rear & 12 MP Front Camera,Memory Card Not Supported,iOS v14
149,151,Apple iPhone 12 Mini,40999,74.0,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC","Bionic A14, Hexa Core, 3.1 GHz Processor","4 GB RAM, 64 GB inbuilt",,"5.4 inches, 1080 x 2340 px Display",12 MP + 12 MP Dual Rear & 12 MP Front Camera,Memory Card Not Supported,iOS v14
155,157,Nokia 2780 Flip,4990,,"Dual Sim, 3G, 4G, Wi-Fi","Snapdragon QM215, Quad Core, 1.3 GHz Processor","4 GB RAM, 512 MB inbuilt",,,,,
159,161,Oppo Find N2 5G,94990,,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC","Snapdragon 8+ Gen1, Octa Core, 3.2 GHz Processor","12 GB RAM, 256 GB inbuilt",,,,,
236,238,Xiaomi Mix Fold 2 5G,106990,,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC, IR Bl...","Snapdragon 8+ Gen1 , Octa Core, 3.2 GHz Proce...","12 GB RAM, 256 GB inbuilt",,,,,
271,273,Nokia 2720 V Flip,6199,,"Dual Sim, 3G, 4G, VoLTE, Wi-Fi","Snapdragon 205 , Dual Core, 1.1 GHz Processor","512 MB RAM, 4 GB inbuilt",,,,,
306,308,Samsung Galaxy Z Flip 3,69999,84.0,"Single Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC","Snapdragon 888, Octa Core, 2.84 GHz Processor","8 GB RAM, 128 GB inbuilt",,,,,
307,309,Apple iPhone 12 (128GB),55999,75.0,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC","Bionic A14, Hexa Core, 3.1 GHz Processor","4 GB RAM, 128 GB inbuilt",,"6.1 inches, 1170 x 2532 px Display with Large ...",12 MP + 12 MP Dual Rear & 12 MP Front Camera,Memory Card Not Supported,iOS v14
321,323,Samsung Galaxy Z Fold 4,154998,,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC","Snapdragon 8+ Gen1, Octa Core, 3.2 GHz Processor","12 GB RAM, 256 GB inbuilt",,,,,


### 3.7 card

In [47]:
(
    clean_data(df)
    .card
    .value_counts()
)

card
Memory Card Not Supported              362
Memory Card Supported, upto 1 TB       171
Memory Card Supported, upto 512 GB     105
Memory Card (Hybrid), upto 1 TB         91
Memory Card Supported                   89
Memory Card Supported, upto 256 GB      87
Memory Card (Hybrid)                    30
Memory Card (Hybrid), upto 256 GB       13
Memory Card (Hybrid), upto 512 GB       11
Memory Card Supported, upto 128 GB       6
Memory Card Supported, upto 2 TB         5
Memory Card Supported, upto 32 GB        4
Memory Card (Hybrid), upto 128 GB        3
Memory Card (Hybrid), upto 64 GB         3
Memory Card Supported, upto 1000 GB      1
Memory Card (Hybrid), upto 2 TB          1
Name: count, dtype: int64

### 3.8 os

In [57]:
(
    clean_data(df)
    .os
    .value_counts()
)

os
Android v12                           394
Android v11                           274
Android v13                            91
Android v10                            69
Android v9.0 (Pie)                     29
Android v10.0                          23
iOS v16                                15
iOS v15                                12
Android v8.1 (Oreo)                    10
iOS v14                                 6
Memory Card Not Supported               6
Android v8.0 (Oreo)                     4
Android v11.0                           4
iOS v13                                 4
iOS v15.0                               3
Memory Card (Hybrid), upto 2 TB         2
Memory Card Supported, upto 256 GB      2
Android v5.1.1 (Lollipop)               2
Android v12.1                           2
HarmonyOS v2.0                          2
iOS v14.0                               2
EMUI v12                                2
Android v6.0 (Marshmallow)              2
Memory Card (Hybrid), upto 256 

In [61]:
(
    clean_data(df)
    .os
    .value_counts()
)

os
Android v12                           394
Android v11                           274
Android v13                            91
Android v10                            69
Android v9.0 (Pie)                     29
Android v10.0                          23
iOS v16                                15
iOS v15                                12
Android v8.1 (Oreo)                    10
iOS v14                                 6
Memory Card Not Supported               6
Android v11.0                           4
iOS v13                                 4
Android v8.0 (Oreo)                     4
iOS v15.0                               3
Android v5.1.1 (Lollipop)               2
Memory Card Supported, upto 256 GB      2
Android v12.1                           2
HarmonyOS v2.0                          2
Memory Card (Hybrid), upto 2 TB         2
EMUI v12                                2
Android v6.0 (Marshmallow)              2
Memory Card (Hybrid), upto 256 GB       2
iOS v14.0                      

### 3.9 display

In [64]:
(
    clean_data(df)
    .display
    .value_counts()
)

display
6.67 inches, 1080 x 2400 px, 120 Hz Display with Punch Hole    54
6.5 inches, 720 x 1600 px Display with Water Drop Notch        36
6.7 inches, 1080 x 2412 px, 120 Hz Display with Punch Hole     25
6.52 inches, 720 x 1600 px Display with Water Drop Notch       23
6.5 inches, 1080 x 2400 px, 90 Hz Display with Punch Hole      22
                                                               ..
5.86 inches, 720 x 1520 px Display with Large Notch             1
6.43 inches, 1440 x 3200 px, 120 Hz Display with Punch Hole     1
6.6 inches, 1080 x 2400 px, 144 Hz Display                      1
6.73 inches, 1440 x 3120 px, 120 Hz Display with Punch Hole     1
6.8 inches, 1080 x 2400 px Display with Punch Hole              1
Name: count, Length: 347, dtype: int64

In [None]:
(982/1020)*100

96.27450980392157

## 3. Cleaning Operations

In [5]:
def adjust_index(df):
    return (
        df
        .reset_index()
        .assign(index=lambda df_: df_['index'] + 2)
    )

In [6]:
def shift_battery_to_display(df):

    temp_df = df[df['index'].isin(battery_rows)]
    shifted_values = temp_df.iloc[:, 7:].shift(1, axis=1)

    # Update df with shifted values for specified rows and columns
    df.loc[temp_df.index, df.columns[7:]] = shifted_values.values

    return df



In [9]:
def update_card_columns(df):
    # Step 1: Update 'camera' with 'card' where 'camera' doesn't contain "MP" (for rows in camera_rows)
    df.loc[
        df['index'].isin(camera_rows) & ~df['camera'].fillna("").astype(str).str.contains('MP'),
        'camera'
    ] = df.loc[
        df['index'].isin(camera_rows) & ~df['camera'].fillna("").astype(str).str.contains('MP'),
        'card'
    ]
    
    # Step 2: Update 'card' to "Memory Card Not Supported" where 'card' contains "MP"
    df.loc[
        df['card'].fillna("").astype(str).str.contains('MP'),
        'card'
    ] = 'Memory Card Not Supported'
    
    # Step 3: Update 'os' with values from 'card' where 'card' doesn't contain "Memory Card"
    df.loc[
        ~df['card'].fillna("").astype(str).str.contains('Memory Card'),
        'os'
    ] = df.loc[
        ~df['card'].fillna("").astype(str).str.contains('Memory Card'),
        'card'
    ]
    
    # Step 4: Set 'card' to "Memory Card Not Supported" for rows that do not contain "Memory Card"
    df.loc[
        ~df['card'].fillna("").astype(str).str.contains('Memory Card'),
        'card'
    ] = 'Memory Card Not Supported'
    
    return df

In [23]:
def shift_processor_info(df):
    temp_df = df[df['processor_name'].str.contains('Core')][['processor_name', 'num_cores', 'processor_speed']].shift(1, axis=1)

    if not temp_df.empty: 
        df.loc[temp_df.index, ['processor_name', 'num_cores', 'processor_speed']] = temp_df
    else:
        print("No processors containing 'Core' found. Skipping shift.")  

    return df  

In [24]:
def fast_charging_extractor(df):
    return df.pipe(
        lambda ser: ser.apply(
            lambda item: item[0] if isinstance(item, list) and len(item) == 1 else (0 if isinstance(item, list) else -1)
        )
    )

In [25]:
def camera_extractor(df):
    return df.pipe(
        lambda ser: ser.apply(
            lambda text: '4' if 'Quad' in text else
                        ('3' if 'Triple' in text else
                         ('2' if 'Dual' in text else
                          ('Missing' if 'Missing' in text else '1')))
        )
    )

In [26]:
def os_extractor(df):
    return df.assign(
        os=df['os'].apply(lambda text: 'android' if 'Android' in text else
                          'ios' if 'iOS' in text else
                          text if 'Not Specified' in text else
                          'other' if any(x in text for x in ['Harmony', 'Hongmeng', 'EMUI']) else
                          text)
    )

In [27]:
def clean_data(df):
    return (
        df.assign(
            price=lambda df_: (
                df_.price
                .str.replace('₹', '', regex=False)  
                .str.replace(',', '', regex=False)
                .astype('int')
            )
        )
        .pipe(adjust_index)
        .loc[lambda df_: df_['price'] >= 3400] 
        .drop([645,857,882,925])
        .drop(582)
        .drop([376,754])
        .pipe(shift_battery_to_display)
        .drop([155, 271])
        .pipe(update_card_columns)
        .assign(
            os=lambda df_: df_['os'].where(df_['os'] != 'Bluetooth', np.nan),
            brand_name=lambda df_: df_['model'].str.split(' ').str.get(0).str.lower(),
            has_5g=lambda df_: df_['sim'].str.contains('5G'),
            has_nfc=lambda df_: df_['sim'].str.contains('NFC'),
            has_ir_blaster=lambda df_: df_['sim'].str.contains('IR Blaster'),
            processor_name=lambda df_: df_['processor'].str.split(',').str.get(0).str.strip(),
            num_cores=lambda df_: df_['processor'].str.split(',').str.get(1),
            processor_speed=lambda df_: df_['processor'].str.split(',').str.get(2)
        )
        .pipe(shift_processor_info)  
        .assign(
            processor_brand=lambda df_: df_['processor_name'].str.split(' ').str.get(0).str.lower(),
            num_cores=lambda df_: df_['num_cores'].str.strip().str.replace('Octa Core Processor','Octa Core').str.replace('Hexa Core Processor','Hexa Core'),
            processor_speed=lambda df_: df_['processor_speed'].str.strip().str.split(' ').str.get(0).str.replace('\u2009',' ').str.split(' ').str.get(0).astype(float),
            ram_capacity=lambda df_: df_['ram'].str.strip().str.split(',').str.get(0).str.findall(r'\b(\d+)\b').str.get(0).astype(float),
            internal_memory=lambda df_: df_['ram'].str.strip().str.split(',').str.get(1).str.strip().str.findall(r'\b(\d+)\b').str.get(0).astype(float)
        )
        .drop([486,627])
        .pipe(lambda df_: df_.assign(internal_memory=df_['internal_memory'].replace(1, 1024)))
        .assign(
            battery_capacity=lambda df_: df_['battery'].str.strip().str.split('with').str.get(0).str.strip().str.findall(r'\b(\d+)\b').str.get(0).astype(float),
            fast_charging = lambda df_: (
                            df_['battery']
                            .str.strip()
                            .str.split('with')
                            .str.get(1)
                            .str.strip()
                            .str.findall(r'\d{2,3}')
                            .pipe(fast_charging_extractor)  
            ),
            screen_size = lambda df_: (df_['display'].str.strip().str.split(',').str.get(0).str.strip().str.split(' ').str.get(0).astype(float)),
            resolution = lambda df_: (df_['display'].str.strip().str.split(',').str.get(1).str.strip().str.split('px').str.get(0)),
            refresh_rate = lambda df_: (df_['display'].str.strip().str.split(',').str.get(2).str.strip().str.findall(r'\d{2,3}').str.get(0).apply(lambda x: 60 if pd.isna(x) else x).astype(int)),
            num_rear_cameras = lambda df_: (df_['camera'].str.strip().str.split('&').str.get(0).pipe(camera_extractor)),
            num_front_cameras = lambda df_: (df_['camera'].str.strip().str.split('&').str.get(1).str.strip().fillna('Missing').pipe(camera_extractor)),
            primary_camera_rear = lambda df_: (df_['camera'].str.split(' ').str.get(0).str.replace('\u2009',' ').str.split(' ').str.get(0)),
            primary_camera_front= lambda df_: (df_['camera'].str.split('&').str.get(1).str.strip().str.split(' ').str.get(0).str.replace('\u2009',' ').str.split(' ').str.get(0)),
            extended_memory = lambda df_: (df_['card'].apply(lambda x:'0' if 'Not' in x else x.split('upto')).str.get(-1).str.strip().str.replace('Memory Card Supported','Not Specified'))
        )
        .pipe(os_extractor)
        .drop(columns=['index','sim','processor','ram','battery','display','camera','card'])
        
        # .shape 
    )


In [29]:
df_clean_1 = clean_data(df)

df_clean_1.loc[856, 'processor_name'] = 'Mediatek MT6739'

df_clean_1.loc[[483], ['ram_capacity','internal_memory']] = [12.0, '512']
df_clean_1.loc[69, 'camera'] = '50 MP'
df_clean_1.loc[69, 'card'] = 'Not Specified'
df_clean_1.loc[69, 'os'] = 'Not Specified'

  df_clean_1.loc[[483], ['ram_capacity','internal_memory']] = [12.0, '512']


In [30]:
df_clean_1.shape

(980, 26)

## 4.Export the cleaned data

In [18]:
export_data(dataframe = df_clean_1, dir_name="interim", name="smartphone_cleaned_v1")

Data successfully exported to: E:\Learnings\Learning_curves\Smartphone_spec_score_analysis\data\interim\smartphone_cleaned_v1.csv
