This data was cleaned before (https://github.com/Fab1193/DataAnalysis/blob/main/Apple%20Product%20Prices%20from%2026%20Countries%20(Data%20Cleaning).ipynb)

In [1]:
import numpy as np
import pandas as pd
import re
import altair as alt
import requests
import warnings
warnings.filterwarnings('ignore')

**Read csv file**

In [2]:
datos = pd.read_csv('apple.csv')

In [3]:
datos.head(5)

Unnamed: 0.1,Unnamed: 0,pid,model,country,region,income_group,price_local_currency,code,price_usd
0,0,0,iPhone 13,Australia,East Asia & Pacific,High income,1199.0,AUD,867.67
1,1,1,iPhone SE,Australia,East Asia & Pacific,High income,679.0,AUD,491.37
2,2,2,iPhone 12,Australia,East Asia & Pacific,High income,999.0,AUD,722.94
3,3,3,AirPods(3rd generation),Australia,East Asia & Pacific,High income,279.0,AUD,201.9
4,4,4,AirPods Pro,Australia,East Asia & Pacific,High income,399.0,AUD,288.74


**Drop some columns that won't be used**

In [4]:
df = datos.iloc[:,1:]
df = datos.drop(['pid', 'price_local_currency','code'], axis = 1)
df.head(5)

Unnamed: 0.1,Unnamed: 0,model,country,region,income_group,price_usd
0,0,iPhone 13,Australia,East Asia & Pacific,High income,867.67
1,1,iPhone SE,Australia,East Asia & Pacific,High income,491.37
2,2,iPhone 12,Australia,East Asia & Pacific,High income,722.94
3,3,AirPods(3rd generation),Australia,East Asia & Pacific,High income,201.9
4,4,AirPods Pro,Australia,East Asia & Pacific,High income,288.74


In [5]:
df.describe()

Unnamed: 0.1,Unnamed: 0,price_usd
count,443.0,443.0
mean,221.0,526.057856
std,128.027341,470.676677
min,0.0,37.58
25%,110.5,180.54
50%,221.0,329.0
75%,331.5,790.135
max,442.0,3145.1


There are only 26 countries in the table

In [6]:
len(df.country.unique())

26

In [7]:
df.model.value_counts()

MacBook Air                      26
iPhone 12                        26
AirPods Pro                      26
AirPods(2nd generation)          26
Magic Mouse                      26
Apple TV 4K                      26
Apple Watch SE                   26
Apple Watch Series 3             26
iPhone SE                        26
iPad                             26
iPad Pro                         26
Apple Pencil (2nd generation)    26
24-inch iMac²                    26
Sport Band                       25
AirPods(3rd generation)          25
iPhone 13                        21
Apple TV HD                      17
AirPods Max                      11
iPhone 13 Pro                     5
Sportband                         1
Name: model, dtype: int64

Some products are not present in all countries. Sportband is misspelled, so it needs to be changed. Only products presented in all countries are going to be evaluated.

In [8]:
for i in range (0, len(df.model)):
    if "Sportband" in df.model[i]:
        df['model'][i] = 'Sport Band'
    else:
        df['model'][i] = df.model[i]

In [9]:
df = df[-df["model"].isin(['AirPods(3rd generation)', 'iPhone 13', 'iPhone 13 Pro', 
                           'Apple TV HD', 'Apple TV HD','AirPods Max'])]

In [10]:
df.model.value_counts()

iPhone SE                        26
iPhone 12                        26
AirPods Pro                      26
AirPods(2nd generation)          26
Apple TV 4K                      26
Apple Watch SE                   26
Apple Watch Series 3             26
Sport Band                       26
iPad                             26
iPad Pro                         26
Apple Pencil (2nd generation)    26
24-inch iMac²                    26
MacBook Air                      26
Magic Mouse                      26
Name: model, dtype: int64

In [11]:
df.dtypes

Unnamed: 0        int64
model            object
country          object
region           object
income_group     object
price_usd       float64
dtype: object

**Plot function**

In [12]:
class plot_data:
    def __init__ (self, data):
        self.__data = data
        
    @property
    def data (self):
        return self.__data
    
    @data.setter
    def data (self, newdata):
        self.__data = newdata    
    
    def density(self, x = 'variable', cat = 'model'):
        plot = alt.Chart(df).mark_area().encode(
            x = x + ':Q',
            y = 'density:Q',
            color = cat
        ).transform_density(
            density = x,
            as_=[x, 'density'],
            groupby=[cat],
        ).properties(width=300).facet(
            cat,
            columns = 2
        ).resolve_scale(
            y = 'independent',
            x = 'independent'
        ).transform_filter(
            'isValid(datum.' + cat + ')'
        )
        return plot 
 

    
    def line(self, x = 'variable1', y = 'variable2', z = 'variable3', title = '', subtitle = '', xt = '', 
            yt= '', lt = ''):
        input_dropdown = alt.binding_select(options = self.data[z].dropna().unique(), name = z +'   ')
        selector = alt.selection_single(fields=[z], bind=input_dropdown)
        plot_title = alt.TitleParams(title, subtitle=[subtitle])


        plot = alt.Chart(self.data, 
                         title = plot_title
                         ).mark_line().encode(
            x = alt.X(x, title = xt),
            y = alt.Y('mean('+ y + '):Q',
                      title = yt),
            color = alt.Color(z, legend = alt.Legend(title = lt)),
            opacity = alt.condition(selector, alt.value(1.0), alt.value(0.1)),
            tooltip = [x, z, y]
        ).add_selection(
            selector
        ).transform_filter(
            '(isValid(datum.' + z + '))'
        ).properties(width=550, height=550)


        plot = plot.configure(
          title = alt.TitleConfig(
            fontSize = 22,
            align = "center",
            color = "#364547"),
          axis = alt.AxisConfig(
            labelFontSize = 12,
            titleFontSize = 12,
            titleColor = "#364547"),
          legend = alt.LegendConfig(
            titleFontSize = 14,
            labelFontSize = 10,
            titleColor = "#364547"),
          axisX = alt.AxisConfig(labelAngle = -90),
          font = 'Roboto') 
        return plot
    
    def bar_product(self, x = 'variable1', y = 'variable2', z = 'variable3', 
                 article = 'article', xt = '', yt ='', zt = ''):

        input_dropdown = alt.binding_select(options=self.data[z].dropna().unique(), name= z + '   ')
        selector = alt.selection_single(fields=[z], bind=input_dropdown)


        plot = alt.Chart(self.data, 
                         title = article + " prices in 26 countries"
                         ).mark_bar().encode(
            x = alt.X(x +':Q', title = xt),
            y = alt.Y(y,
                      title=yt),
            color = alt.Color(z, legend = alt.Legend(title = zt)),
            opacity = alt.condition(selector, alt.value(1.0), alt.value(0.1)),
            tooltip = [z, y,
                       x + ':Q']
        ).add_selection(
            selector
        ).transform_filter(
            '(isValid(datum.'+ z +'))'
        ).properties(width=550, height=550)


        plot = plot.configure(
          title = alt.TitleConfig(
            fontSize = 22,
            align = "center",
            color = "#364547"),
          axis = alt.AxisConfig(
            labelFontSize = 13,
            titleFontSize = 13,
            titleColor = "#364547"),
          legend = alt.LegendConfig(
            titleFontSize = 14,
            labelFontSize = 13,
            titleColor = "#364547"),
          axisX = alt.AxisConfig(labelAngle = 0),
          font = 'Roboto') 

        return display(plot)

**Pricing Density by product**

In [13]:
f = plot_data(df)
f.density('price_usd', 'model')

Countries share similar prices for all products, but the chart shows that there is al least one country where people pays more for the products

**Average price in each country**

In [14]:
f = plot_data(df)
f.line('country', 'price_usd', 'model', title = 'Average Product Price', subtitle = 'By Product', xt = 'Country', 
       yt = 'Price', lt = 'Products')

For all the countries, the most expensive product is the 24-inch iMac², and the cheapest is the Sport Band. There is a peak in Brazil for all products, meaning that the price is higher. United States looks like the cheapest country to buy.

In [15]:
lis = df.model.unique()
modelsdict = {elem : pd.DataFrame for elem in lis}
for key in modelsdict.keys():
    modelsdict[key] = df[:][df.model == key]

**Pricing comparison by product**

In [16]:
v = 0
f = plot_data(data = list(modelsdict.values())[v])
f.bar_product(x = 'price_usd', y = 'country', z = 'income_group', article = list(modelsdict.keys())[v], 
       xt = 'Price in USD', yt = 'Country', zt = 'Income Group')

* In USA iPhone SE costs \\$399, in Brazil \\$661.

In [17]:
v = 1
f = plot_data(data = list(modelsdict.values())[v])
f.bar_product(x = 'price_usd', y = 'country', z = 'income_group', article = list(modelsdict.keys())[v], 
       xt = 'Price in USD', yt = 'Country', zt = 'Income Group')

* iPhone 12 costs more in Brazil, and Turkey (\\$1020).
* United States and Canada are the least expensive countries (\\$600).

In [18]:
v = 2
f = plot_data(data = list(modelsdict.values())[v])
f.bar_product(x = 'price_usd', y = 'country', z = 'income_group', article = list(modelsdict.keys())[v], 
       xt = 'Price in USD', yt = 'Country', zt = 'Income Group')

In [19]:
v = 3
f = plot_data(data = list(modelsdict.values())[v])
f.bar_product(x = 'price_usd', y = 'country', z = 'income_group', article = list(modelsdict.keys())[v], 
       xt = 'Price in USD', yt = 'Country', zt = 'Income Group')

* The best price for the AirPods Pro and the AirPods(2nd generation) are in the US

In [20]:
v = 4
f = plot_data(data = list(modelsdict.values())[v])
f.bar_product(x = 'price_usd', y = 'country', z = 'income_group', article = list(modelsdict.keys())[v], 
       xt = 'Price in USD', yt = 'Country', zt = 'Income Group')

* The Apple TV is just $1 cheaper in US against Canada.

In [21]:
v = 5
f = plot_data(data = list(modelsdict.values())[v])
f.bar_product(x = 'price_usd', y = 'country', z = 'income_group', article = list(modelsdict.keys())[v], 
       xt = 'Price in USD', yt = 'Country', zt = 'Income Group')

In [22]:
v = 6
f = plot_data(data = list(modelsdict.values())[v])
f.bar_product(x = 'price_usd', y = 'country', z = 'income_group', article = list(modelsdict.keys())[v], 
       xt = 'Price in USD', yt = 'Country', zt = 'Income Group')

* Apple Watch Series 3 and SE are cheaper in USA

In [23]:
v = 7
f = plot_data(data = list(modelsdict.values())[v])
f.bar_product(x = 'price_usd', y = 'country', z = 'income_group', article = list(modelsdict.keys())[v], 
       xt = 'Price in USD', yt = 'Country', zt = 'Income Group')

* In Turkey, the Sport Band costs $10 less than in the US.

In [24]:
v = 8
f = plot_data(data = list(modelsdict.values())[v])
f.bar_product(x = 'price_usd', y = 'country', z = 'income_group', article = list(modelsdict.keys())[v], 
       xt = 'Price in USD', yt = 'Country', zt = 'Income Group')

In [25]:
v = 9
f = plot_data(data = list(modelsdict.values())[v])
f.bar_product(x = 'price_usd', y = 'country', z = 'income_group', article = list(modelsdict.keys())[v], 
       xt = 'Price in USD', yt = 'Country', zt = 'Income Group')

- For the iPad and the rest of the products, prices in Europe are close to each other. Prices in Mexico are similar to the European ones.

In [26]:
v = 10
f = plot_data(data = list(modelsdict.values())[v])
f.bar_product(x = 'price_usd', y = 'country', z = 'income_group', article = list(modelsdict.keys())[v], 
       xt = 'Price in USD', yt = 'Country', zt = 'Income Group')

- The best price for Apple Pencil is located in Turkey.

In [27]:
v = 11
f = plot_data(data = list(modelsdict.values())[v])
f.bar_product(x = 'price_usd', y = 'country', z = 'income_group', article = list(modelsdict.keys())[v], 
       xt = 'Price in USD', yt = 'Country', zt = 'Income Group')

* Thailand is the cheapest country to buy the 24-inch iMac (\\$1293.32). The cost in Brazil is 2.42 times the price in the United States. (\\$3145.1)

In [28]:
v = 12
f = plot_data(data = list(modelsdict.values())[v])
f.bar_product(x = 'price_usd', y = 'country', z = 'income_group', article = list(modelsdict.keys())[v], 
       xt = 'Price in USD', yt = 'Country', zt = 'Income Group')

* MacBook Air is cheapest in Thailand.

In [29]:
v = 13
f = plot_data(data = list(modelsdict.values())[v])
f.bar_product(x = 'price_usd', y = 'country', z = 'income_group', article = list(modelsdict.keys())[v], 
       xt = 'Price in USD', yt = 'Country', zt = 'Income Group')

* Canada offers the best price for the Magic Mouse

# Conclusions

* Brazil is the most expensive country to buy an Apple product.
* Turkey is the cheapest country to buy the Apple Pencil, Sport Band.
* iMac and MacBook Air are cheaper in Thailand.
* United States is the best country to buy iPhone, Airpods, and iWatch.
* Most of the European countries offer similar prices.
* The prices in Mexico are similar to Europe.
* Canada is closes to the US in most of the articles, and offers the best price for the Magic Mouse.