In [4]:
# import the necessary Python libraries.
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

In [5]:
data = pd.read_csv("apple_products.csv")
print(data.head())

                               Product Name  \
0         APPLE iPhone 8 Plus (Gold, 64 GB)   
1  APPLE iPhone 8 Plus (Space Grey, 256 GB)   
2      APPLE iPhone 8 Plus (Silver, 256 GB)   
3           APPLE iPhone 8 (Silver, 256 GB)   
4             APPLE iPhone 8 (Gold, 256 GB)   

                                         Product URL  Brand  Sale Price  \
0  https://www.flipkart.com/apple-iphone-8-plus-g...  Apple       49900   
1  https://www.flipkart.com/apple-iphone-8-plus-s...  Apple       84900   
2  https://www.flipkart.com/apple-iphone-8-plus-s...  Apple       84900   
3  https://www.flipkart.com/apple-iphone-8-silver...  Apple       77000   
4  https://www.flipkart.com/apple-iphone-8-gold-2...  Apple       77000   

     Mrp  Discount Percentage  Number Of Ratings  Number Of Reviews  \
0  49900                    0               3431                356   
1  84900                    0               3431                356   
2  84900                    0               3431     

In [6]:
# check whether this dataset contains any null values
print(data.isnull().sum())

Product Name           0
Product URL            0
Brand                  0
Sale Price             0
Mrp                    0
Discount Percentage    0
Number Of Ratings      0
Number Of Reviews      0
Upc                    0
Star Rating            0
Ram                    0
dtype: int64


In [7]:
# descriptive statistics of the data.
print(data.describe())

          Sale Price            Mrp  Discount Percentage  Number Of Ratings  \
count      62.000000      62.000000            62.000000          62.000000   
mean    80073.887097   88058.064516             9.951613       22420.403226   
std     34310.446132   34728.825597             7.608079       33768.589550   
min     29999.000000   39900.000000             0.000000         542.000000   
25%     49900.000000   54900.000000             6.000000         740.000000   
50%     75900.000000   79900.000000            10.000000        2101.000000   
75%    117100.000000  120950.000000            14.000000       43470.000000   
max    140900.000000  149900.000000            29.000000       95909.000000   

       Number Of Reviews  Star Rating  
count          62.000000    62.000000  
mean         1861.677419     4.575806  
std          2855.883830     0.059190  
min            42.000000     4.500000  
25%            64.000000     4.500000  
50%           180.000000     4.600000  
75%     

In [8]:
# find top 10 highest-rated iPhones
highest_rated = data.sort_values(by=["Star Rating"], ascending=False)
highest_rated = highest_rated.head(10)

print(highest_rated['Product Name'])

20     APPLE iPhone 11 Pro Max (Midnight Green, 64 GB)
17         APPLE iPhone 11 Pro Max (Space Grey, 64 GB)
16    APPLE iPhone 11 Pro Max (Midnight Green, 256 GB)
15               APPLE iPhone 11 Pro Max (Gold, 64 GB)
14              APPLE iPhone 11 Pro Max (Gold, 256 GB)
0                    APPLE iPhone 8 Plus (Gold, 64 GB)
29                     APPLE iPhone 12 (White, 128 GB)
32          APPLE iPhone 12 Pro Max (Graphite, 128 GB)
35                     APPLE iPhone 12 (Black, 128 GB)
36                      APPLE iPhone 12 (Blue, 128 GB)
Name: Product Name, dtype: object


In [9]:
# number of ratings of the highest-rated iPhones.
iphones = highest_rated["Product Name"].value_counts()
label = iphones.index
counts = highest_rated["Number Of Ratings"]
figure = px.bar(highest_rated, x=label,
                y = counts,
            title="Number of Ratings of Highest Rated iPhones")
figure.show()

In [10]:
# number of reviews of the highest-rated iPhones.
iphones = highest_rated["Product Name"].value_counts()
label = iphones.index
counts = highest_rated["Number Of Reviews"]
figure = px.bar(highest_rated, x=label,
                y = counts,
            title="Number of Reviews of Highest Rated iPhones")
figure.show()

In [11]:
# relationship between the sale price of iPhones and their ratings.
figure = px.scatter(data_frame = data, x="Number Of Ratings",
                    y="Sale Price", size="Discount Percentage",
                    trendline="ols",
                    title="Relationship between Sale Price and Number of Ratings of iPhones")
figure.show()

In [12]:
# find relationship between the discount percentage on iPhones on Flipkart and the number of ratings
figure = px.scatter(data_frame = data, x="Number Of Ratings",
                    y="Discount Percentage", size="Sale Price",
                    trendline="ols",
                    title="Relationship between Discount Percentage and Number of Ratings of iPhones")
figure.show()

In [14]:
# find relationship between the difference of Mrp and Sale Price on iPhones on Flipkart and the number of ratings
data["difference"]= data["Mrp"] - data["Sale Price"]

figure = px.scatter(data_frame = data, x="Number Of Ratings",
                    y="difference", size="Sale Price",
                    trendline="ols",
                    title="Relationship between Number Of Ratings and difference of Mrp and Sale Price of iPhones")


figure.show()

In [15]:
# find relationship between the difference of Mrp and Sale Price on iPhones on Flipkart and the number of ratings

df_melted = data.melt(id_vars="Number Of Ratings",
                    value_vars=["Sale Price", "Mrp"],
                    var_name="Price Type",
                    value_name="Price")

df_melted = df_melted.sort_values(by="Number Of Ratings")

fig = px.scatter(data_frame=df_melted,
                 x="Number Of Ratings",
                 y="Price",
                 color="Price Type",
                 trendline="ols",
                 title="Relationship between Number Of Ratings and Sale Price/MRP of iPhones")

fig.show()

In [16]:
# find relationship between the Sale Price on iPhones  and the number of ratings based on the RAM
fig = px.scatter(data_frame=data,
                 x="Number Of Ratings",
                 y="Sale Price",
                 color="Ram",
                 trendline="ols",
                 title="Relationship between Number Of Ratings and Sale Price of iPhones")

# Show the figure
fig.show()

In [17]:
# Group by 'RAM' and sum the 'Number Of Ratings'
df_grouped = data.groupby('Ram', as_index=False).sum()

fig = px.pie(data_frame=df_grouped,
             names='Ram',
             values="Number Of Ratings",
             title='Sales Distribution of iPhones (Aggregated by Available RAM)')

fig.show()