# Import necessary libraries

In [1]:
# Import pandas library for data manipulation and analysis
import pandas as pd
# Import numpy for numerical operations
import numpy as np
# Import regular expression module for text pattern matching
import re
# Import BeautifulSoup for parsing HTML and XML documents
from bs4 import BeautifulSoup
# Import warnings module to manage warning messages
import warnings
# Import requests library for making HTTP requests
import requests
# Suppress warning messages to keep output clean
warnings.filterwarnings('ignore')

### Sending an HTTP GET request to Jumia Nigeria's Apple mobile phones page 
### And retrieve the HTML content as text for further processing

In [2]:
# Send an HTTP GET request to Jumia Nigeria's Apple mobile phones page
# and retrieve the HTML content as text for further processing
source = requests.get('https://www.jumia.com.ng/mobile-phones/apple/#catalog-listing').text

### Creating a BeautifulSoup object from the HTML source using the lxml parser

In [3]:
# Create a BeautifulSoup object from the HTML source
# using the 'lxml' parser for efficient HTML parsing
soup = BeautifulSoup(source, 'lxml')

### Finding the first 'article' HTML element with class attributes "prd _fb col c-prd"

In [4]:
# Find the article element with class attributes "prd _fb col c-prd"
# This targets the product container on a webpage
Article = soup.find('article', class_="prd _fb col c-prd")

### printing the HTML structure of the 'Article' object in a more readable format

In [5]:
# This line is commented out, but would print the HTML structure of the 'Article' element
# in a more readable format using BeautifulSoup's prettify() method
#print(Article.prettify())

### Extract the phone info from the article by finding the h3 element with class "name"

In [6]:
# Extract the phone name/info from the article by finding the h3 element with class "name"
# and get its text content with whitespace stripped
phone_info = Article.find('h3', class_="name").get_text(strip=True)
phone_info  # Display the extracted phone information

'Apple IPhone 14 Pro Max 6.7" (256GB ROM + 6GB RAM) Nano Sim-Purple'

### Extract the price text from the HTML element with class "prc"

In [7]:
# Extract the price text from the HTML element with class "prc"
# Remove spaces and strip whitespace characters from the extracted text
phone_price = Article.find('div', class_="prc").get_text().replace(' ', '').strip()
phone_price  # Return the cleaned price text

'₦1,400,000'

### Finding the first anchor tag with an href attribute that starts with '/apple-' and Combine it with base url

In [8]:
# Define the base URL for Jumia Nigeria
base_link = "https://www.jumia.com.ng"

# Find the first anchor tag with an href attribute that starts with '/apple-'
link = Article.find('a', href=lambda x: x and x.startswith('/apple-'))

# Combine the base URL with the relative path to create a complete URL
full_link = base_link + link['href']

# Print the complete URL
print(full_link)

https://www.jumia.com.ng/apple-iphone-14-pro-max-6.7-256gb-rom-6gb-ram-nano-sim-purple-321406095.html


### Looping through pages 1 to 16 of search results to get all the phone information

In [9]:
# Initialize empty list to store phone data
Phone= []

# Loop through pages 1 to 16 of search results
for page in range(1, 17):
    # Make HTTP request to Jumia website for each page of iPhone search results
    source = requests.get(f"https://www.jumia.com.ng/catalog/?q=iphone&page={page}").text
    # Parse the HTML content using BeautifulSoup
    soup = BeautifulSoup(source, 'lxml')

    # Find all product articles on the page
    Articles = soup.find_all('article', class_="prd _fb col c-prd")
    # Print progress information
    print(f"Scraping page {page}... found {len(Articles)} items")

    # Loop through each product article
    for Article in Articles:
        # Extract the phone name, removing extra whitespace
        name = Article.find('h3', class_="name").get_text(strip=True)
        # Extract the price, removing spaces and extra whitespace
        price = Article.find('div', class_="prc").get_text().replace(' ', '').strip()

        # Add the extracted data as a dictionary to the Phone list
        Phone.append({
            'Phone_info': name,
            'Phone_Price': price
        })

Scraping page 1... found 40 items
Scraping page 2... found 40 items
Scraping page 3... found 40 items
Scraping page 4... found 40 items
Scraping page 5... found 40 items
Scraping page 6... found 40 items
Scraping page 7... found 40 items
Scraping page 8... found 40 items
Scraping page 9... found 40 items
Scraping page 10... found 40 items
Scraping page 11... found 40 items
Scraping page 12... found 40 items
Scraping page 13... found 40 items
Scraping page 14... found 40 items
Scraping page 15... found 40 items
Scraping page 16... found 40 items


### Printing the length of the Phone scraped

In [10]:
# Print the length of the Phone variable 
#print(len(Phone))

### Creating a DataFrame from the 'Phone' data structure

In [11]:
# This converts the data into a tabular format for easier manipulation and analysis
#df = pd.DataFrame(Phone)

### Saving the DataFrame 'df' to a CSV file

In [12]:
# Save the DataFrame 'df' to a CSV file named 'jumia_iphones.csv' without including the index column
#df.to_csv("jumia_iphones.csv", index=False)

### Reading the saved csv file into a DataFrame

In [13]:
# Read the iPhone data from the CSV file 'jumia_iphones.csv' into a pandas DataFrame
DataFrame = pd.read_csv("jumia_iphones.csv")

### Displaying the first 5 rows

In [14]:
# Display the first 5 rows of the DataFrame
# For quick inspection of  the data structure and content
DataFrame.head(5)

Unnamed: 0,Phone_info,Phone_Price
0,Apple IPhone 11 6.1-Inch Liquid Retina LCD (4G...,"₦500,000"
1,Apple IPhone 12 Pro Max - 6.7-Inch - 128GB ROM...,"₦690,000"
2,Apple IPhone 16 - 8GB-256GB - 5G - Ultramarine,"₦1,899,000"
3,PHILLY 50W Iphone Type-c To Lighting Fast Ch...,"₦3,912"
4,IPhone 6 99.9%NEW 1GB RAM+16GB ROM 1810mAh Sin...,"₦65,906"


In [15]:
# Convert the DataFrame's column names to a list and print the string representation
# repr() shows the exact string representation including quotes and escape characters
print(repr(DataFrame.columns.tolist()))

['Phone_info', 'Phone_Price']


### defining iPhone models in chronological order from iPhone 3G to recent models

In [16]:
# List of iPhone models in chronological order from iPhone 3G to recent models
# This includes all main iPhone releases, including standard, Plus, Pro, Pro Max, mini, SE, and Air variants
# The list spans from early models (3G, 4, 5 series) through current models (16 and 17 series)
models = [
    'iPhone 3G', 'iPhone 3GS', 'iPhone 4', 'iPhone 4s', 'iPhone 5', 
    'iPhone 5c', 'iPhone 5s', 'iPhone 6', 'iPhone 6 Plus', 'iPhone 6s', 
    'iPhone 6s Plus', 'iPhone SE', 'iPhone 7', 'iPhone 7 Plus', 'iPhone 8', 
    'iPhone 8 Plus', 'iPhone X', 'iPhone XS', 'iPhone XS Max', 'iPhone XR', 
    'iPhone 11', 'iPhone 11 Pro', 'iPhone 11 Pro Max', 'iPhone 12', 
    'iPhone 12 mini', 'iPhone 12 Pro', 'iPhone 12 Pro Max', 'iPhone 13', 
    'iPhone 13 mini', 'iPhone 13 Pro', 'iPhone 13 Pro Max', 'iPhone 14', 
    'iPhone 14 Plus', 'iPhone 14 Pro', 'iPhone 14 Pro Max', 'iPhone 15', 
    'iPhone 15 Plus', 'iPhone 15 Pro', 'iPhone 15 Pro Max', 'iPhone 16', 
    'iPhone 16 Plus', 'iPhone 16 Pro', 'iPhone 16 Pro Max', 'iPhone 16e', 
    'iPhone 17', 'iPhone Air', 'iPhone 17 Pro', 'iPhone 17 Pro Max'
]

### Sorting the models

In [17]:
# Sort the 'models' list by length of each element in descending order (longest first)
sorted_models = sorted(models, key=len, reverse=True)

In [18]:
# Create an empty list to store the identified phone series
series_list = []
# Iterate through each phone information entry in the DataFrame
for name in DataFrame['Phone_info']:
    # Check if the phone information is missing (NaN)
    if pd.isna(name):
        # If missing, label as 'Unknown'
        series_list.append('Unknown')
    else:
        # Convert phone information to lowercase for case-insensitive matching
        name_lower = str(name).lower()
        # Default value if no match is found
        found = 'Unknown'
        # Iterate through the sorted list of phone models
        for model in sorted_models:
            # Check if the current model appears in the phone information
            if model.lower() in name_lower:
                # If found, assign the model name and stop searching
                found = model
                break
        # Add the identified model to the list
        series_list.append(found)

# Create a new column 'Series' in the DataFrame with the identified phone models
DataFrame['Series'] = series_list

### Finding duplicate rows in DataFrame based on 'Phone_info', 'Series', and 'Phone_Price' 

In [19]:
# Find duplicate rows in DataFrame based on 'Phone_info', 'Series', and 'Phone_Price' columns
# keep=False ensures all duplicates are marked (not just first or last occurrences)
duplicates = DataFrame[DataFrame.duplicated(subset=['Phone_info', 'Series', 'Phone_Price'], keep=False)]
print(duplicates)  # Display all identified duplicate rows

                                            Phone_info Phone_Price  \
0    Apple IPhone 11 6.1-Inch Liquid Retina LCD (4G...    ₦500,000   
1    Apple IPhone 12 Pro Max - 6.7-Inch - 128GB ROM...    ₦690,000   
2       Apple IPhone 16 - 8GB-256GB - 5G - Ultramarine  ₦1,899,000   
4    IPhone 6 99.9%NEW 1GB RAM+16GB ROM 1810mAh Sin...     ₦65,906   
16   IPhone 6 99.9%NEW 1GB RAM+16GB ROM 1810mAh Sin...     ₦65,906   
..                                                 ...         ...   
583  Otg 64GB Ultra OTG Micro USB Type-c Lightning ...     ₦14,900   
586  K9 Dual Wireless Lapel Microphone Lavalier Mic...     ₦11,300   
595    Pro 5s EarPod For IOS(iPhone) & Android Devices      ₦6,000   
613  Apple IPhone 16 Pro Max - 8GB - 512GB - Single...  ₦2,150,000   
633  Apple iPhone 16 Pro Max - 8GB - 256GB -5G - De...  ₦1,920,999   

                Series  
0            iPhone 11  
1    iPhone 12 Pro Max  
2            iPhone 16  
4             iPhone 6  
16            iPhone 6  
..       

### Removing duplicates

In [20]:
# Remove duplicate rows from DataFrame based on 'Series' and 'Phone_Price' columns
# This keeps only the first occurrence of each unique combination
df_clean = DataFrame.drop_duplicates(subset=['Series', 'Phone_Price'])

### Checking the rows doesn't match the defined iphone model

In [21]:
# Filter the dataframe to only include rows where 'Series' column equals 'Unknown'
# and store the result in a new dataframe called 'Unknown'
Unknown = df_clean[df_clean['Series'] == 'Unknown']
# Display the contents of the 'Unknown' dataframe
print(Unknown)

                                            Phone_info Phone_Price   Series
3    PHILLY 50W Iphone Type-c To Lighting  Fast  Ch...      ₦3,912  Unknown
10   K9 Double Wireless Lapel Microphone Tie Mic TY...      ₦9,197  Unknown
11   Holder Portable Phone Holder Mobile IPhone, Sm...      ₦2,700  Unknown
15   Ace-Type TypeC Charger 120W Fast Android USB C...      ₦5,529  Unknown
19   K9 Dual Wireless Lapel Microphone Lavalier Tie...     ₦10,643  Unknown
..                                                 ...         ...      ...
627  Realme earbuds NEWAGE Wireless Earphones Bluet...     ₦16,765  Unknown
629  Oraimo Firefly 3 10W Fast IPhone Charger + Lig...      ₦7,495  Unknown
634  DUAL Type-c Wireless Lapel Microphone For IPho...     ₦13,980  Unknown
635  X2 Newest Long Battery Life Simple And Compact...     ₦18,523  Unknown
638  Flick 50000 MAH Large Capacity Full For IPhone...     ₦36,000  Unknown

[210 rows x 3 columns]


### Display all unknown entries for futher inspection before dropping it

In [22]:
# Temporarily set pandas to display all rows
pd.set_option('display.max_rows', None)

# Display the entire Unknown DataFrame
print(Unknown[['Phone_info', 'Phone_Price', 'Series']])

# Optional: reset the option back to default to avoid huge outputs later
pd.reset_option('display.max_rows')

                                            Phone_info      Phone_Price  \
3    PHILLY 50W Iphone Type-c To Lighting  Fast  Ch...           ₦3,912   
10   K9 Double Wireless Lapel Microphone Tie Mic TY...           ₦9,197   
11   Holder Portable Phone Holder Mobile IPhone, Sm...           ₦2,700   
15   Ace-Type TypeC Charger 120W Fast Android USB C...           ₦5,529   
19   K9 Dual Wireless Lapel Microphone Lavalier Tie...          ₦10,643   
23   K9 Double Wireless Lapel Microphone Tie Mic TY...           ₦9,454   
31   HEAVY DUTY 120W SMART IPHONE USB-C To LIGHTING...           ₦4,940   
34   Holder Portable Phone Holder Mobile IPhone, Sm...           ₦3,267   
35                    Apple iPhone 16 (256 GB) – Black       ₦1,370,446   
36   TiLECC Wireless Lapel Microphone Type-c For IP...          ₦11,200   
37   Double Wireless Microphone TYPE-C Android Phon...           ₦9,200   
48   Hollyland Lark M1 Wireless Lavalier Microphone...          ₦47,081   
61   Wireless Music Wirel

### chehcking the rows of the cleaned DataFrame

In [23]:
# Get the number of rows in the cleaned DataFrame
len(df_clean)

436

In [24]:
# Randomly display 10 rows from the cleaned DataFrame
df_clean.sample(10)

Unnamed: 0,Phone_info,Phone_Price,Series
383,Dual Wireless Microphone For IPhone & Type-C +...,"₦11,250",Unknown
194,Apple iPhone 17 Pro Max 5G 6.9'' 12GB RAM 2TB ...,"₦5,250,000",iPhone 17 Pro Max
163,IPhone 15/ 15plus/15 Pro/15pro Max/16/16plus/1...,"₦6,999",iPhone 15
631,Iphone 17 pro max transparent shockproof back ...,"₦3,500",iPhone 17 Pro Max
317,"15W Wireless Fast Charger For Android, Samsung...","₦8,950",Unknown
151,Asni For Men Waterproof Swimming Leather Origi...,"₦20,900",Unknown
626,Camera Lens Shield Protector For IPhone 11/12/...,"₦2,890",iPhone 11
147,Wireless Stereo Earphone Wireless Earpiece 5 3...,"₦7,250",Unknown
606,"100WD Liquid Silicone Case For IPhone 13, 12, ...","₦5,000-₦5,800",iPhone 13
74,IPhone 6 99.9%NEW 1GB RAM+32GB ROM 1810mAh Sin...,"₦68,898",iPhone 6


### Inspecting iPhone 16 Pro Max

In [25]:
# Filter the dataset to include only rows where the Series column equals "iPhone 16 Pro Max"
iphone_16_pro_max = df_clean[df_clean['Series'] == 'iPhone 16 Pro Max']

# Print the filtered DataFrame to view the records for iPhone 16 Pro Max
print(iphone_16_pro_max)

                                            Phone_info Phone_Price  \
8    Apple IPhone 16 Pro Max  - 8GB - 256GB -5G - B...  ₦2,299,000   
20   Apple IPhone 16 Pro Max  - 8GB - 512GB -5G - N...  ₦2,750,000   
46   Apple IPhone 16 Pro Max - 8GB - 512GB - Single...  ₦2,150,000   
82   Apple iPhone 16 Pro Max (256 GB) – Black Titanium  ₦1,953,407   
87   Apple iPhone 16 Pro Max - 8GB - 256GB -5G - De...  ₦1,920,999   
307  Designer IPhone 16 Pro Max Case - Fashionable ...     ₦17,000   
418       Xundd Transparent Case For Iphone 16 Pro Max     ₦14,000   
517  IPhone 16 Pro Max Silicone Phone Case & Pouch ...      ₦4,741   
529  IPhone 16 Pro Max /16 Pro/16 /16 Plus Full Scr...      ₦4,500   
567  Rugged Case With Built-in Screen Guard For IPh...     ₦15,000   
600  Designer IPhone 16 Pro Max Case - Fashionable ...     ₦17,500   
609  Iphone 16 Pro Max Jelly Liquid Silicone Phone ...      ₦9,000   

                Series  
8    iPhone 16 Pro Max  
20   iPhone 16 Pro Max  
46   iPhone 16

### Inspecting iPhone 16

In [26]:
# Filter the dataframe to get only rows where the Series is "iPhone 16"
iphone_16 = df_clean[df_clean['Series'] == 'iPhone 16']

# Display the filtered result
print(iphone_16)

                                            Phone_info    Phone_Price  \
2       Apple IPhone 16 - 8GB-256GB - 5G - Ultramarine     ₦1,899,000   
172  Type Z FOR Charger USB Type C To Type C IPhone...         ₦4,995   
253  For IPhone 16 Silicone Fine Pore Phone Case(La...         ₦5,300   
278  Type Z FOR Charger USB Type C To Type C IPhone...         ₦4,900   
337  IPhone 16 High Quality Protective Transparent ...         ₦2,700   
345                      Iphone 16 white silicone case         ₦4,000   
364  MagSafe Magnetic Wireless PowerBank 5,000mAh -...        ₦16,141   
387  Dual Type C Ports For Fast Charging For IPhone...         ₦5,900   
445  Dual Type C Ports for Fast charging For iPhone...  ₦5,550-₦5,700   
453  65W  Type C To Type C Charger For Iphone 16, 1...         ₦6,500   
456  Quality IPHONE 16 Magnetic Magsafe Transparent...         ₦8,500   
506  iPhone 16 Camera Lens Protector - Camera Prote...         ₦3,791   
591  Iphone 16/16 Plus/16 Pro/16 Pro Max Quality So

### Create a new DataFrame by removing rows where the Series column has the value "Unknown"

In [27]:
# Create a new DataFrame by removing rows where the Series column has the value "Unknown"
# reset_index(drop=True) resets the index after filtering so it starts from 0 again
df1 = df_clean[df_clean['Series'] != 'Unknown'].reset_index(drop=True)

# Display the cleaned DataFrame
df1

Unnamed: 0,Phone_info,Phone_Price,Series
0,Apple IPhone 11 6.1-Inch Liquid Retina LCD (4G...,"₦500,000",iPhone 11
1,Apple IPhone 12 Pro Max - 6.7-Inch - 128GB ROM...,"₦690,000",iPhone 12 Pro Max
2,Apple IPhone 16 - 8GB-256GB - 5G - Ultramarine,"₦1,899,000",iPhone 16
3,IPhone 6 99.9%NEW 1GB RAM+16GB ROM 1810mAh Sin...,"₦65,906",iPhone 6
4,Apple IPhone 6s Plus 99.9%NEW 2GB RAM+32GB ROM...,"₦88,907",iPhone 6s Plus
...,...,...,...
221,Iphone 16 / 16 Plus Magsafe UltraSlim Hard Ba...,"₦7,999",iPhone 16
222,Iphone 17 pro max transparent shockproof back ...,"₦3,500",iPhone 17 Pro Max
223,Fast iPhone 15 pro max PD Car Mp3 Charger Blue...,"₦6,900",iPhone 15 Pro Max
224,IPhone 15 Pro Max Direct Earpiece,"₦7,500",iPhone 15 Pro Max


In [28]:
# Get unique values from the 'Series' column in df1
# Sort them alphabetically to view in an organized way
print(sorted(df1['Series'].unique()))

['iPhone 11', 'iPhone 11 Pro Max', 'iPhone 12', 'iPhone 12 Pro', 'iPhone 12 Pro Max', 'iPhone 12 mini', 'iPhone 13', 'iPhone 13 Pro', 'iPhone 13 Pro Max', 'iPhone 14', 'iPhone 14 Plus', 'iPhone 14 Pro', 'iPhone 14 Pro Max', 'iPhone 15', 'iPhone 15 Plus', 'iPhone 15 Pro', 'iPhone 15 Pro Max', 'iPhone 16', 'iPhone 16 Plus', 'iPhone 16 Pro', 'iPhone 16 Pro Max', 'iPhone 17', 'iPhone 17 Pro', 'iPhone 17 Pro Max', 'iPhone 3GS', 'iPhone 6', 'iPhone 6 Plus', 'iPhone 6s Plus', 'iPhone 7', 'iPhone 7 Plus', 'iPhone SE', 'iPhone X', 'iPhone XR', 'iPhone XS', 'iPhone XS Max']


### Inpecting iphone 3GS

In [29]:
# Filter df1 to include only rows where the Series is "iPhone 3GS"
iPhone_3GS = df1[df1['Series'] == 'iPhone 3GS']

# Display the filtered DataFrame to see all records for iPhone 3GS
print(iPhone_3GS)


                                            Phone_info Phone_Price      Series
121  2PCS 30 Pin Data Sync Cable, For IPhone 4 & 4S...      ₦5,000  iPhone 3GS


### Inpecting iphone 6

In [30]:
# Filter df1 to include only rows where the Series is "iPhone 6"
iPhone_6 = df1[df1['Series'] == 'iPhone 6']

# Display the filtered DataFrame to see all records for iPhone 6
print(iPhone_6)

                                            Phone_info    Phone_Price  \
3    IPhone 6 99.9%NEW 1GB RAM+16GB ROM 1810mAh Sin...        ₦65,906   
14   Iphone 6 - 4.7" - 1/64GB - 8Mpx - 4G (Recondit...        ₦77,500   
22   Apple iPhone 6 Renewed 99.9%New 32G Unlocked F...        ₦73,305   
39   IPhone 6 99.9%NEW 1GB RAM+32GB ROM 1810mAh Sin...        ₦68,898   
43   Apple IPhone 6 99.9%NEW 1GB RAM+16GB ROM 1810m...        ₦57,949   
102  4 in 1 USB C iPhone 60W Fast Charging Cable Br...  ₦2,999-₦5,887   
108  iPhone Crystal Butterfly Star Soft Case For iP...  ₦4,999-₦5,599   
207  Transparent 360 Degree Touch Case Front And Ba...         ₦2,650   

       Series  
3    iPhone 6  
14   iPhone 6  
22   iPhone 6  
39   iPhone 6  
43   iPhone 6  
102  iPhone 6  
108  iPhone 6  
207  iPhone 6  


### Inpecting iPhone 6 plus

In [31]:
# Filter df1 to include only rows where the Series is "iPhone 6 Plus"
iPhone_6_plus = df1[df1['Series'] == 'iPhone 6 Plus']

# Display the filtered DataFrame to see all records for iPhone 6 Plus
print(iPhone_6_plus)

                                            Phone_info Phone_Price  \
90   Classy IPhone 6 Plus /6s Plus Silicone Back Ca...      ₦3,000   
117  Apple IPhone 6 Plus 6s Plus Case Airbag Cover ...     ₦26,500   
136  Apple IPhone 6 Plus 6s Plus Case Airbag Cover ...     ₦22,227   
153  Solid IPhone 6 Plus /6s Plus Silicone Back Cas...      ₦2,000   

            Series  
90   iPhone 6 Plus  
117  iPhone 6 Plus  
136  iPhone 6 Plus  
153  iPhone 6 Plus  


### Inpecting iphone 6s plus

In [32]:
# Filter df1 to include only rows where the Series is "iPhone 6s Plus"
iPhone_6s_plus = df1[df1['Series'] == 'iPhone 6s Plus']

# Display the filtered DataFrame to see all records for iPhone 6s Plus
print(iPhone_6s_plus)

                                           Phone_info Phone_Price  \
4   Apple IPhone 6s Plus 99.9%NEW 2GB RAM+32GB ROM...     ₦88,907   
15  Apple IPhone 6s Plus 99.9%NEW 2GB RAM+64GB ROM...     ₦99,003   

            Series  
4   iPhone 6s Plus  
15  iPhone 6s Plus  


### Inpecting iphone SE

In [33]:
# Filter df1 to include only rows where the Series is "iPhone SE"
iPhone_SE = df1[df1['Series'] == 'iPhone SE']

# Display the filtered DataFrame to see all records for iPhone SE
print(iPhone_SE)

                                           Phone_info    Phone_Price  \
81  iPhone Series Solid Transparent Case Pouch - M...  ₦3,350-₦6,000   

       Series  
81  iPhone SE  


### Removing currency symbol, commas and any leading or trailing whitespace

In [34]:
# Remove the currency symbol (₦) and commas from the 'Phone_Price' column
# Also remove any leading or trailing whitespace
df1['Phone_Price'] = df1['Phone_Price'].str.replace('[₦,]', '', regex=True).str.strip()

# Display the updated DataFrame
df1

Unnamed: 0,Phone_info,Phone_Price,Series
0,Apple IPhone 11 6.1-Inch Liquid Retina LCD (4G...,500000,iPhone 11
1,Apple IPhone 12 Pro Max - 6.7-Inch - 128GB ROM...,690000,iPhone 12 Pro Max
2,Apple IPhone 16 - 8GB-256GB - 5G - Ultramarine,1899000,iPhone 16
3,IPhone 6 99.9%NEW 1GB RAM+16GB ROM 1810mAh Sin...,65906,iPhone 6
4,Apple IPhone 6s Plus 99.9%NEW 2GB RAM+32GB ROM...,88907,iPhone 6s Plus
...,...,...,...
221,Iphone 16 / 16 Plus Magsafe UltraSlim Hard Ba...,7999,iPhone 16
222,Iphone 17 pro max transparent shockproof back ...,3500,iPhone 17 Pro Max
223,Fast iPhone 15 pro max PD Car Mp3 Charger Blue...,6900,iPhone 15 Pro Max
224,IPhone 15 Pro Max Direct Earpiece,7500,iPhone 15 Pro Max


### Converting the 'Phone_Price' column to numeric values

In [35]:
# Convert the 'Phone_Price' column to numeric values
# If the price is a range (e.g., "1000-1500"), take the average of the two values
# Otherwise, convert the single price string to a float
df1['Phone_Price'] = df1['Phone_Price'].apply(
    lambda x: np.mean([float(p) for p in x.split('-')]) if '-' in x else float(x)
)

In [36]:
# Filter df1 to include only rows where the Series is "iPhone 6" after cleaning the Phone_Price column
iPhone_6 = df1[df1['Series'] == 'iPhone 6']

# Display the filtered DataFrame to see all iPhone 6 records with numeric prices
print(iPhone_6)

                                            Phone_info  Phone_Price    Series
3    IPhone 6 99.9%NEW 1GB RAM+16GB ROM 1810mAh Sin...      65906.0  iPhone 6
14   Iphone 6 - 4.7" - 1/64GB - 8Mpx - 4G (Recondit...      77500.0  iPhone 6
22   Apple iPhone 6 Renewed 99.9%New 32G Unlocked F...      73305.0  iPhone 6
39   IPhone 6 99.9%NEW 1GB RAM+32GB ROM 1810mAh Sin...      68898.0  iPhone 6
43   Apple IPhone 6 99.9%NEW 1GB RAM+16GB ROM 1810m...      57949.0  iPhone 6
102  4 in 1 USB C iPhone 60W Fast Charging Cable Br...       4443.0  iPhone 6
108  iPhone Crystal Butterfly Star Soft Case For iP...       5299.0  iPhone 6
207  Transparent 360 Degree Touch Case Front And Ba...       2650.0  iPhone 6


### After removing rows and columns labeled as "Unknown," I noticed that some phone accessories were still classified under defined iPhone models. Upon inspection, I found that none of these accessories had prices close to the lowest-priced iPhone in the dataset. To address this, I identified the price of the cheapest iPhone model and removed any items priced below that threshold, ensuring the dataset contains only actual phone entries.

In [37]:
# Remove rows where the Phone_Price is below 57,949 (because it's the lowest iPhone price)
# Reset the index after filtering to start from 0 again
df1 = df1[df1['Phone_Price'] >= 57949.0].reset_index(drop=True)

# Display the updated DataFrame
df1

Unnamed: 0,Phone_info,Phone_Price,Series
0,Apple IPhone 11 6.1-Inch Liquid Retina LCD (4G...,500000.0,iPhone 11
1,Apple IPhone 12 Pro Max - 6.7-Inch - 128GB ROM...,690000.0,iPhone 12 Pro Max
2,Apple IPhone 16 - 8GB-256GB - 5G - Ultramarine,1899000.0,iPhone 16
3,IPhone 6 99.9%NEW 1GB RAM+16GB ROM 1810mAh Sin...,65906.0,iPhone 6
4,Apple IPhone 6s Plus 99.9%NEW 2GB RAM+32GB ROM...,88907.0,iPhone 6s Plus
5,"Apple IPhone 11 Pro Max 6.5-Inch (4GB RAM, 256...",615000.0,iPhone 11 Pro Max
6,"Apple IPhone 15 Pro Max 6.7"" 512GB Nano-SIM 5G...",1900000.0,iPhone 15 Pro Max
7,Apple IPhone 16 Pro Max - 8GB - 256GB -5G - B...,2299000.0,iPhone 16 Pro Max
8,"Apple IPhone 14 Pro Max 6.7"" (256GB ROM + 6GB ...",1450000.0,iPhone 14 Pro Max
9,"Apple IPhone 16 Plus 5G - 6.7"" - 128GB ROM - 8...",1650000.0,iPhone 16 Plus


In [38]:
# Get the number of rows in the updated DataFrame after filtering out low-priced entries
len(df1)

51

In [39]:
# Filter df1 to include only rows where the Series is "iPhone 6" after removing low-priced entries
iPhone_6 = df1[df1['Series'] == 'iPhone 6']

# Display the filtered DataFrame to verify iPhone 6 records remain
print(iPhone_6)

                                           Phone_info  Phone_Price    Series
3   IPhone 6 99.9%NEW 1GB RAM+16GB ROM 1810mAh Sin...      65906.0  iPhone 6
14  Iphone 6 - 4.7" - 1/64GB - 8Mpx - 4G (Recondit...      77500.0  iPhone 6
22  Apple iPhone 6 Renewed 99.9%New 32G Unlocked F...      73305.0  iPhone 6
37  IPhone 6 99.9%NEW 1GB RAM+32GB ROM 1810mAh Sin...      68898.0  iPhone 6
40  Apple IPhone 6 99.9%NEW 1GB RAM+16GB ROM 1810m...      57949.0  iPhone 6


In [40]:
# Filter df1 to include only rows where the Series is "iPhone 16" after cleaning and price filtering
iphone_16 = df1[df1['Series'] == 'iPhone 16']

# Display the filtered DataFrame to see all iPhone 16 records
print(iphone_16)

                                       Phone_info  Phone_Price     Series
2  Apple IPhone 16 - 8GB-256GB - 5G - Ultramarine    1899000.0  iPhone 16


In [41]:
# Display the first 5 rows of the updated DataFrame to get a quick overview
df1.head(5)

Unnamed: 0,Phone_info,Phone_Price,Series
0,Apple IPhone 11 6.1-Inch Liquid Retina LCD (4G...,500000.0,iPhone 11
1,Apple IPhone 12 Pro Max - 6.7-Inch - 128GB ROM...,690000.0,iPhone 12 Pro Max
2,Apple IPhone 16 - 8GB-256GB - 5G - Ultramarine,1899000.0,iPhone 16
3,IPhone 6 99.9%NEW 1GB RAM+16GB ROM 1810mAh Sin...,65906.0,iPhone 6
4,Apple IPhone 6s Plus 99.9%NEW 2GB RAM+32GB ROM...,88907.0,iPhone 6s Plus


### Saving the cleaned DataFrame to a CSV file

In [212]:
# Save the cleaned and filtered DataFrame to a CSV file named 'cleaned_jumia_iphone.csv'
# index=False ensures that the row indices are not written to the file
#df1.to_csv('cleaned_jumia_iphone.csv', index=False)