Importing necessary libraries

In [2]:
import pandas as pd
import numpy as np
import seaborn as sb
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression as lr
from sklearn.linear_model import LogisticRegression as lgr
from sklearn.model_selection import train_test_split as tts
from sklearn.preprocessing import StandardScaler as ss
from sklearn.preprocessing import OrdinalEncoder as oe
from sklearn.preprocessing import OneHotEncoder as ohe
from sklearn.preprocessing import LabelEncoder as le
from sklearn.compose import ColumnTransformer as ct
from sklearn.impute import SimpleImputer as si
from sklearn.tree import DecisionTreeClassifier as dtc
from sklearn.ensemble import RandomForestClassifier as rfc
from sklearn.ensemble import RandomForestRegressor as rfr
from sklearn.ensemble import AdaBoostClassifier as abc
from sklearn.metrics import accuracy_score as acs
from sklearn.tree import DecisionTreeClassifier as dtc
from sklearn.tree import DecisionTreeRegressor as dtr
from sklearn.svm import SVC

Importing the main dataset

In [3]:
D1 = pd.read_excel("Processed_Flipdata.xlsx")

Exploring the dataset

In [4]:
D1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 541 entries, 0 to 540
Data columns (total 12 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Unnamed        541 non-null    int64  
 1   Model          541 non-null    object 
 2   Colour         541 non-null    object 
 3   Memory         541 non-null    int64  
 4   RAM            541 non-null    int64  
 5   Battery_       541 non-null    int64  
 6   Rear Camera    541 non-null    object 
 7   Front Camera   541 non-null    object 
 8   AI Lens        541 non-null    int64  
 9   Mobile Height  541 non-null    float64
 10  Processor_     541 non-null    object 
 11  Prize          541 non-null    int64  
dtypes: float64(1), int64(6), object(5)
memory usage: 50.8+ KB


In [5]:
D1.head()

Unnamed: 0,Unnamed,Model,Colour,Memory,RAM,Battery_,Rear Camera,Front Camera,AI Lens,Mobile Height,Processor_,Prize
0,0,Infinix SMART 7,Night Black,64,4,6000,13MP,5MP,1,16.76,Unisoc Spreadtrum SC9863A1,7299
1,1,Infinix SMART 7,Azure Blue,64,4,6000,13MP,5MP,1,16.76,Unisoc Spreadtrum SC9863A1,7299
2,2,MOTOROLA G32,Mineral Gray,128,8,5000,50MP,16MP,0,16.64,Qualcomm Snapdragon 680,11999
3,3,POCO C50,Royal Blue,32,2,5000,8MP,5MP,0,16.56,Mediatek Helio A22,5649
4,4,Infinix HOT 30i,Marigold,128,8,5000,50MP,5MP,1,16.76,G37,8999


Dropping unnecessary columns

In [6]:
D1 = D1.drop(columns=["Unnamed"],axis = 1)

In [7]:
D1.isnull().sum()

Model            0
Colour           0
Memory           0
RAM              0
Battery_         0
Rear Camera      0
Front Camera     0
AI Lens          0
Mobile Height    0
Processor_       0
Prize            0
dtype: int64

In [8]:
D1.columns

Index(['Model', 'Colour', 'Memory', 'RAM', 'Battery_', 'Rear Camera',
       'Front Camera', 'AI Lens', 'Mobile Height', 'Processor_', 'Prize'],
      dtype='object')

In [9]:
D1["Memory"].unique()

array([ 64, 128,  32, 256,  16])

In [10]:
D1["RAM"].unique()

array([4, 8, 2, 6, 3])

In [11]:
D1["Battery_"].unique()

array([6000, 5000, 4500,  800, 1900, 1000, 4400, 1020, 4410, 5080, 2550,
       4980, 4600, 1200, 1500, 3300, 4020, 4300, 4800, 4200, 4610, 4050,
       4270, 4000, 4030, 4115, 3000, 3110, 4700, 5020, 4323, 4830, 5200,
       7000, 2815, 3900, 3100])

In [12]:
D1["Rear Camera"].unique()

array(['13MP', '50MP', '8MP', '40MP', '32MP', '48MP', '64MP', '108MP',
       '0MP', '2MP', '12MP', '200MP', '5MP', '16MP'], dtype=object)

In [13]:
D1["Front Camera"].unique()

array(['5MP', '16MP', '8MP', '13MP', '0MP', '32MP', '10MP', '50MP',
       '60MP', '20MP', '44MP', '2MP', '12MP'], dtype=object)

In [14]:
D1["Mobile Height"].unique()

array([16.76, 16.64, 16.56, 17.02, 16.51, 16.71, 17.04, 16.21, 16.94,
       16.33, 41.94, 17.07,  4.5 ,  7.11, 16.26,  6.1 , 15.6 , 16.81,
        5.08, 16.59, 16.43, 16.66, 17.22, 16.36, 17.32, 15.46,  4.57,
       16.54, 15.49, 16.  , 15.7 , 16.69, 15.8 , 13.84, 17.53, 16.55,
       17.65, 12.7 ])

In [15]:
D1["Colour"].unique()

array(['Night Black', 'Azure Blue', 'Mineral Gray', 'Royal Blue',
       'Marigold', 'Glacier Blue', 'Mirror Black', 'Diamond White',
       'Shadow Black', 'Emerald Green', 'Waterfall Blue', 'Opal Green',
       'Power Black', 'Country Green', 'Cosmic Black', 'Aurora Gold',
       'Marine Blue', 'Glimmer Black', 'Green Apple', 'Black',
       'Light Blue', 'Light Green', 'Cool Blue', 'Forest Green',
       'Nightsky Green', 'Matte Charcoal', 'Eco Black', 'Caribbean Green',
       'Jade Purple', 'Satin Silver', 'Sunrise Orange', 'Sunrise Copper',
       'Arctic Blue', 'Lavender Blue', 'Velocity Wave', 'Nitro Blaze',
       'Frosted Blue', 'Yellow', 'Midnight Gray', 'Creamy White',
       'Sandy Gold', 'Aqua Blue', 'GOAT Green', 'Midnight Blue',
       'Night Sea', 'Bamboo Green', 'Lake Blue', 'Denim Black',
       'Wildcat Blue', 'Icy Blue', 'Flashy Black', 'Sunshower',
       'Rainy Night', 'Rainforest', 'B.A.E. Purple', 'Peppy Purple',
       'Lucent White', 'Dark Matter', 'Nebula Bl

In [16]:
D2 = D1

In [17]:
D2['Colour'] = np.where(D2["Colour"].str.contains("Blue", case=False, na=False), "Blue", D2["Colour"])
D2['Colour'] = np.where(D2["Colour"].str.contains("Cyan", case=False, na=False), "Blue", D2["Colour"])
D2['Colour'] = np.where(D2["Colour"].str.contains("Black", case=False, na=False), "Black", D2["Colour"])
D2['Colour'] = np.where(D2["Colour"].str.contains("Blue", case=False, na=False), "Blue", D2["Colour"])
D2['Colour'] = np.where(D2["Colour"].str.contains("Gray", case=False, na=False), "Gray", D2["Colour"])
D2['Colour'] = np.where(D2["Colour"].str.contains("Lime", case=False, na=False), "Green", D2["Colour"])
D2['Colour'] = np.where(D2["Colour"].str.contains("White", case=False, na=False), "White", D2["Colour"])
D2['Colour'] = np.where(D2["Colour"].str.contains("Green", case=False, na=False), "Green", D2["Colour"])
D2['Colour'] = np.where(D2["Colour"].str.contains("Gold", case=False, na=False), "Gold", D2["Colour"])
D2['Colour'] = np.where(D2["Colour"].str.contains("Charcoal", case=False, na=False), "Black", D2["Colour"])
D2['Colour'] = np.where(D2["Colour"].str.contains("Purple", case=False, na=False), "Purple", D2["Colour"])
D2['Colour'] = np.where(D2["Colour"].str.contains("Silver", case=False, na=False), "Silver", D2["Colour"])
D2['Colour'] = np.where(D2["Colour"].str.contains("Orange", case=False, na=False), "Orange", D2["Colour"])
D2['Colour'] = np.where(D2["Colour"].str.contains("Copper", case=False, na=False), "Copper", D2["Colour"])
D2['Colour'] = np.where(D2["Colour"].str.contains("Grey", case=False, na=False), "Gray", D2["Colour"])
D2['Colour'] = np.where(D2["Colour"].str.contains("Aqua Sky", case=False, na=False), "Blue", D2["Colour"])
D2['Colour'] = np.where(D2["Colour"].str.contains("Chalk", case=False, na=False), "White", D2["Colour"])
D2['Colour'] = np.where(D2["Colour"].str.contains("Snowfall", case=False, na=False), "White", D2["Colour"])
D2['Colour'] = np.where(D2["Colour"].str.contains("Red", case=False, na=False), "Red", D2["Colour"])
D2['Colour'] = np.where(D2["Colour"].str.contains("Metallic Rose", case=False, na=False), "Red", D2["Colour"])
D2['Colour'] = np.where(D2["Colour"].str.contains("Lemon", case=False, na=False), "Green", D2["Colour"])
D2['Colour'] = np.where(D2["Colour"].str.contains("Midnight", case=False, na=False), "Black", D2["Colour"])
D2['Colour'] = np.where(D2["Colour"].str.contains("Graphite", case=False, na=False), "Gray", D2["Colour"])
D2['Colour'] = np.where(D2["Colour"].str.contains("Violet", case=False, na=False), "Violet", D2["Colour"])

In [18]:
Colours = ['Red','Blue','Black','Gray','Green','White','Gold','Purple','Silver','Orange','Copper','Violet']

In [19]:
D2.loc[~D2["Colour"].isin(Colours), "Colour"] = "Other"

In [20]:
D2.to_csv("Colourchanged.csv")

In [21]:
pd.get_dummies(D2, columns=['Model', 'Colour', 'Memory', 'RAM', 'Battery_', 'Rear Camera',
       'Front Camera', 'AI Lens', 'Mobile Height', 'Processor_'], drop_first=False)

Unnamed: 0,Prize,Model_APPLE iPhone 11,Model_APPLE iPhone 12,Model_APPLE iPhone 14 Plus,Model_Google Pixel 6a,Model_Google Pixel 7,Model_Google Pixel 7a,Model_I Kall Z19Pro,Model_I Kall Z19Pro Flash blue,Model_IQOO Neo 7 5G,...,Processor__Unisoc Spreadtrum SC9863A1,Processor__Unisoc T606,Processor__Unisoc T610,Processor__Unisoc T612,Processor__Unisoc T612 processor,Processor__Unisoc T616,Processor__Unisoc Tiger T612 (12 nm),Processor__Unisoc Tiger T616,Processor__ios,Processor__snapdragon 888
0,7299,False,False,False,False,False,False,False,False,False,...,True,False,False,False,False,False,False,False,False,False
1,7299,False,False,False,False,False,False,False,False,False,...,True,False,False,False,False,False,False,False,False,False
2,11999,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,5649,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,8999,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
536,79999,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
537,5998,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
538,9990,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
539,38999,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
