This data was cleaned before (https://github.com/Fab1193/DataAnalysis/blob/main/Apple%20Product%20Prices%20from%2026%20Countries%20(Data%20Cleaning).ipynb)

In [None]:
import numpy as np
import pandas as pd
import geopandas as gp
import osmnx as ox
import re
import folium
import altair as alt
import requests
import warnings
import matplotlib.pyplot as plt
warnings.filterwarnings('ignore')

**Read csv file**

In [None]:
datos = pd.read_csv('apple.csv')

In [None]:
datos.head(5)

**Drop some columns that won't be used**

In [None]:
df = datos.iloc[:,1:]
df = datos.drop(['pid', 'price_local_currency','code'], axis = 1)
df.head(5)

In [None]:
df.describe()

There are only 26 countries in the table

In [None]:
len(df.country.unique())

In [None]:
df.model.value_counts()

Some products are not present in all countries. Sportband is misspelled, so it needs to be changed. Only products presented in all countries are going to be evaluated.

In [None]:
for i in range (0, len(df.model)):
    if "Sportband" in df.model[i]:
        df['model'][i] = 'Sport Band'
    else:
        df['model'][i] = df.model[i]

In [None]:
df = df[-df["model"].isin(['AirPods(3rd generation)', 'iPhone 13', 'iPhone 13 Pro', 
                           'Apple TV HD', 'Apple TV HD','AirPods Max'])]

In [None]:
df.model.value_counts()

In [None]:
df.dtypes

**Importing shp file for geospatial chart**

In [None]:
countries = gp.read_file("geo_data/countries/World_Countries__Generalized_.shp")   
countries.head()

In [None]:
countries = countries[countries["COUNTRY"].isin(list(df.country.unique()))]
countries.head()

In [None]:
geo_df = countries.merge(df, left_on = 'COUNTRY', right_on = 'country')
geo_df = geo_df[['model', 'country', 'region', 'income_group', 'price_usd', 'geometry']]
geo_df.head(5)

**Plot function**

In [None]:
class plot_data:
    def __init__ (self, data):
        self.__data = data
        
    @property
    def data (self):
        return self.__data
    
    @data.setter
    def data (self, newdata):
        self.__data = newdata    
    
    def density(self, x = 'variable', cat = 'model'):
        plot = alt.Chart(df).mark_area().encode(
            x = x + ':Q',
            y = 'density:Q',
            color = cat
        ).transform_density(
            density = x,
            as_=[x, 'density'],
            groupby=[cat],
        ).properties(width=300).facet(
            cat,
            columns = 2
        ).resolve_scale(
            y = 'independent',
            x = 'independent'
        ).transform_filter(
            'isValid(datum.' + cat + ')'
        )
        return plot 
 

    def map(self, column1 = 'variable1', column2 = 'variable2'):
        m = folium.Map(zoom_start = 9, tiles = None)
        folium.TileLayer('CartoDB positron',control=False).add_to(m)
        
        choropleth = folium.Choropleth(
            geo_data = self.data,
            data = self.data,
            columns = [column1,column2],
            key_on = "feature.properties." + column1,
            fill_color = 'YlGn',
            highlight = True).add_to(m)

        choropleth.geojson.add_child(
            folium.features.GeoJsonTooltip([column1,column2], labels=True)
        ).add_to(m)
        return m

    
    def line(self, x = 'variable1', y = 'variable2', z = 'variable3', title = '', subtitle = '', xt = '', 
            yt= '', lt = ''):
        input_dropdown = alt.binding_select(options = self.data[z].dropna().unique(), name = z +'   ')
        selector = alt.selection_single(fields=[z], bind=input_dropdown)
        plot_title = alt.TitleParams(title, subtitle=[subtitle])


        plot = alt.Chart(self.data, 
                         title = plot_title
                         ).mark_line().encode(
            x = alt.X(x, title = xt),
            y = alt.Y('mean('+ y + '):Q',
                      title = yt),
            color = alt.Color(z, legend = alt.Legend(title = lt)),
            opacity = alt.condition(selector, alt.value(1.0), alt.value(0.1)),
            tooltip = [x, z, y]
        ).add_selection(
            selector
        ).transform_filter(
            '(isValid(datum.' + z + '))'
        ).properties(width=550, height=550)


        plot = plot.configure(
          title = alt.TitleConfig(
            fontSize = 22,
            align = "center",
            color = "#364547"),
          axis = alt.AxisConfig(
            labelFontSize = 12,
            titleFontSize = 12,
            titleColor = "#364547"),
          legend = alt.LegendConfig(
            titleFontSize = 14,
            labelFontSize = 10,
            titleColor = "#364547"),
          axisX = alt.AxisConfig(labelAngle = -90),
          font = 'Roboto') 
        return plot
    
    def bar_product(self, x = 'variable1', y = 'variable2', z = 'variable3', 
                 article = 'article', xt = '', yt ='', zt = ''):

        input_dropdown = alt.binding_select(options=self.data[z].dropna().unique(), name= z + '   ')
        selector = alt.selection_single(fields=[z], bind=input_dropdown)


        plot = alt.Chart(self.data, 
                         title = article + " prices in 26 countries"
                         ).mark_bar().encode(
            x = alt.X(x +':Q', title = xt),
            y = alt.Y(y,
                      title=yt),
            color = alt.Color(z, legend = alt.Legend(title = zt)),
            opacity = alt.condition(selector, alt.value(1.0), alt.value(0.1)),
            tooltip = [z, y,
                       x + ':Q']
        ).add_selection(
            selector
        ).transform_filter(
            '(isValid(datum.'+ z +'))'
        ).properties(width=550, height=550)


        plot = plot.configure(
          title = alt.TitleConfig(
            fontSize = 22,
            align = "center",
            color = "#364547"),
          axis = alt.AxisConfig(
            labelFontSize = 13,
            titleFontSize = 13,
            titleColor = "#364547"),
          legend = alt.LegendConfig(
            titleFontSize = 14,
            labelFontSize = 13,
            titleColor = "#364547"),
          axisX = alt.AxisConfig(labelAngle = 0),
          font = 'Roboto') 

        return display(plot)

**Pricing Density by product**

In [None]:
f = plot_data(df)
f.density('price_usd', 'model')

Countries share similar prices for all products, but the chart shows that there is al least one country where people pays more for the products

**Geospatial Chart**

In [None]:
f = plot_data(geo_df)
f.map('country', 'price_usd')

This geospatial chart shows all the countries present in the data. According to this, Brazil is the most expensive country.

In [None]:
f = plot_data(df)
f.line('country', 'price_usd', 'model', title = 'Average Product Price', subtitle = 'By Product', xt = 'Country', 
       yt = 'Price', lt = 'Products')

For all countries, the most expensive product is the 24-inch iMac², and the cheapest is the Sport Band. There is a peak in Brazil for all products, meaning that the price is higher. The United States is the cheapest country to buy.

In [None]:
lis = df.model.unique()
modelsdict = {elem : pd.DataFrame for elem in lis}
for key in modelsdict.keys():
    modelsdict[key] = df[:][df.model == key]

**Pricing comparison by product**

In [None]:
v = 0
f = plot_data(data = list(modelsdict.values())[v])
f.bar_product(x = 'price_usd', y = 'country', z = 'income_group', article = list(modelsdict.keys())[v], 
       xt = 'Price in USD', yt = 'Country', zt = 'Income Group')

* In USA iPhone SE costs \\$399, in Brazil \\$661.

In [None]:
v = 1
f = plot_data(data = list(modelsdict.values())[v])
f.bar_product(x = 'price_usd', y = 'country', z = 'income_group', article = list(modelsdict.keys())[v], 
       xt = 'Price in USD', yt = 'Country', zt = 'Income Group')

* iPhone 12 costs more in Brazil, and Turkey (\\$1020).
* United States and Canada are the least expensive countries (\\$600).

In [None]:
v = 2
f = plot_data(data = list(modelsdict.values())[v])
f.bar_product(x = 'price_usd', y = 'country', z = 'income_group', article = list(modelsdict.keys())[v], 
       xt = 'Price in USD', yt = 'Country', zt = 'Income Group')

In [None]:
v = 3
f = plot_data(data = list(modelsdict.values())[v])
f.bar_product(x = 'price_usd', y = 'country', z = 'income_group', article = list(modelsdict.keys())[v], 
       xt = 'Price in USD', yt = 'Country', zt = 'Income Group')

* The best price for the AirPods Pro and the AirPods(2nd generation) are in the US

In [None]:
v = 4
f = plot_data(data = list(modelsdict.values())[v])
f.bar_product(x = 'price_usd', y = 'country', z = 'income_group', article = list(modelsdict.keys())[v], 
       xt = 'Price in USD', yt = 'Country', zt = 'Income Group')

* The Apple TV is just $1 cheaper in US against Canada.

In [None]:
v = 5
f = plot_data(data = list(modelsdict.values())[v])
f.bar_product(x = 'price_usd', y = 'country', z = 'income_group', article = list(modelsdict.keys())[v], 
       xt = 'Price in USD', yt = 'Country', zt = 'Income Group')

In [None]:
v = 6
f = plot_data(data = list(modelsdict.values())[v])
f.bar_product(x = 'price_usd', y = 'country', z = 'income_group', article = list(modelsdict.keys())[v], 
       xt = 'Price in USD', yt = 'Country', zt = 'Income Group')

* Apple Watch Series 3 and SE are cheaper in USA

In [None]:
v = 7
f = plot_data(data = list(modelsdict.values())[v])
f.bar_product(x = 'price_usd', y = 'country', z = 'income_group', article = list(modelsdict.keys())[v], 
       xt = 'Price in USD', yt = 'Country', zt = 'Income Group')

* In Turkey, the Sport Band costs $10 less than in the US.

In [None]:
v = 8
f = plot_data(data = list(modelsdict.values())[v])
f.bar_product(x = 'price_usd', y = 'country', z = 'income_group', article = list(modelsdict.keys())[v], 
       xt = 'Price in USD', yt = 'Country', zt = 'Income Group')

In [None]:
v = 9
f = plot_data(data = list(modelsdict.values())[v])
f.bar_product(x = 'price_usd', y = 'country', z = 'income_group', article = list(modelsdict.keys())[v], 
       xt = 'Price in USD', yt = 'Country', zt = 'Income Group')

- For the iPad and the rest of the products, prices in Europe are close to each other. Prices in Mexico are similar to the European ones.

In [None]:
v = 10
f = plot_data(data = list(modelsdict.values())[v])
f.bar_product(x = 'price_usd', y = 'country', z = 'income_group', article = list(modelsdict.keys())[v], 
       xt = 'Price in USD', yt = 'Country', zt = 'Income Group')

- The best price for Apple Pencil is located in Turkey.

In [None]:
v = 11
f = plot_data(data = list(modelsdict.values())[v])
f.bar_product(x = 'price_usd', y = 'country', z = 'income_group', article = list(modelsdict.keys())[v], 
       xt = 'Price in USD', yt = 'Country', zt = 'Income Group')

* Thailand is the cheapest country to buy the 24-inch iMac (\\$1293.32). The cost in Brazil is 2.42 times the price in the United States. (\\$3145.1)

In [None]:
v = 12
f = plot_data(data = list(modelsdict.values())[v])
f.bar_product(x = 'price_usd', y = 'country', z = 'income_group', article = list(modelsdict.keys())[v], 
       xt = 'Price in USD', yt = 'Country', zt = 'Income Group')

* MacBook Air is cheapest in Thailand.

In [None]:
v = 13
f = plot_data(data = list(modelsdict.values())[v])
f.bar_product(x = 'price_usd', y = 'country', z = 'income_group', article = list(modelsdict.keys())[v], 
       xt = 'Price in USD', yt = 'Country', zt = 'Income Group')

* Canada offers the best price for the Magic Mouse

# Conclusions

* Brazil is the most expensive country to buy an Apple product.
* Turkey is the cheapest country to buy the Apple Pencil, Sport Band.
* iMac and MacBook Air are cheaper in Thailand.
* United States is the best country to buy iPhone, Airpods, and iWatch.
* Most of the European countries offer similar prices.
* The prices in Mexico are similar to Europe.
* Canada is closes to the US in most of the articles, and offers the best price for the Magic Mouse.