### Plotly Figure Friday - 2024 week 43 - Repairs

In [2]:
import polars as pl
import plotly.express as px
# import pycountry

df = (
    pl.read_csv('federal_cty_unharm.csv', ignore_errors=True)
)

print(df)

print(1/0)


#------------------------------------------------------------------------------#
#  MAP COUNTRY ABBREVIATIONS TO FULL NAMES, USING PYCOUNTRY LIBRARY            #
#------------------------------------------------------------------------------#
df_countries = (
    pl.DataFrame(
        dict(
            zip(
                [c.name for c in pycountry.countries],
                [c.alpha_3 for c in pycountry.countries]
            )
        )
    )
    .transpose(include_header=True)
    .rename({'column': 'COUNTRY', 'column_0': 'CTRY_ABBR'})
)

#------------------------------------------------------------------------------#
#  READ DATA SET, TWEAK AND CLEAN FOR THIS EXERECISE                           #
#------------------------------------------------------------------------------#
df = (
    pl.read_csv('OpenRepair_Data_RepairCafeInt_202407.csv')
    .rename({'country': 'CTRY_ABBR'})
    .join(
        df_countries,
        on='CTRY_ABBR',
        how='left'
    )
    .with_columns(pl.col('product_age').cast(pl.UInt16))
    .with_columns(
        PRODUCT_AGE_COUNT = 
            pl.col('repair_status')
            .count()
            .over(['repair_status','product_age'])
            )
    .drop('problem')  # 66_071 unique problems out or 75252 entries, not useful
    .drop('group_identifier')  # too inconsistent, not useful    
    .drop('product_category_id')  # redundant, used named product category   
    .drop('partner_product_category')      # inconsistent data,      
    .drop('id')      # unique record id for this analysis not needed
    .drop('data_provider')  # all values are Repair Café International
    # only 1 entry for unknown, drop it
    .filter(~pl.col('repair_status').is_in(['Unknown']))              
)
# shift country name and abbr to left side of dataframe, drop first col
left_cols = ['COUNTRY', 'CTRY_ABBR']
reordered_cols = left_cols + [c for c in df.columns[1:] if c not in left_cols]
df = df[reordered_cols]

#------------------------------------------------------------------------------#
#  PREPARE DATAFRAME FOR SCATTER PLOTS                                         #
#------------------------------------------------------------------------------#
df_scatter = (
    df
    .select(pl.col('repair_status','product_age', 'PRODUCT_AGE_COUNT'))
    .unique(['repair_status', 'product_age'])
    .pivot(
         on='repair_status',
         values='PRODUCT_AGE_COUNT',
    )
    .sort('product_age', descending=False)
)

#------------------------------------------------------------------------------#
#  SCATTER PLOT REPAIR COUNT BY PRODUCT AGE, LINEAR SCALE                      #
#------------------------------------------------------------------------------#
plot_cols = ['Fixed',  'End of life', 'Repairable'] 
x_max = 40
fig = px.scatter(
    data_frame= df_scatter,
    x = 'product_age',
    y = plot_cols,
    template='simple_white',
    width=800,
    height=500,
)
fig.update_layout(
        title='Linear Scale (Y) of Repair Counts by product age'.upper(),
        xaxis_title='product age [years]'.upper(),
        yaxis_title='linear scale - Repair Count'.upper(),  
        yaxis_range = [0.0, 1400.0],
        xaxis_range=[0, x_max],
        legend_title=None,
        hovermode='x unified', 

)
fig.update_traces(
    mode='lines+markers',
    hovertemplate=' '.join(['%{y}'])
)
fig.show()

#------------------------------------------------------------------------------#
#  SCATTER PLOT REPAIR COUNT BY PRODUCT AGE, LOG SCALE                         #
#------------------------------------------------------------------------------#
fig = px.scatter(
    df_scatter,
    'product_age',
    plot_cols,
    template='simple_white',
    width=800,
    height=500,
    log_y=True,
)
fig.update_layout(
        title='Log Scale (Y) of Repair Counts by product age'.upper(),
        xaxis_title='product age [years]'.upper(),
        yaxis_title='log scale Repair Count'.upper(),  
        yaxis_range = [0.0, 3.5],
        xaxis_range=[0, x_max],
        legend_title=None,
        hovermode='x unified', 
)
fig.update_traces(
    mode='lines+markers',
    hovertemplate=' '.join(['%{y}'])
)
fig.show()


shape: (8_483, 117)
┌───────┬──────┬───────┬──────────────────┬───┬──────────┬───────────┬──────────┬──────────────────┐
│ ags   ┆ year ┆ state ┆ state_name       ┆ … ┆ cdu_csu  ┆ far_right ┆ far_left ┆ far_left_w_linke │
│ ---   ┆ ---  ┆ ---   ┆ ---              ┆   ┆ ---      ┆ ---       ┆ ---      ┆ ---              │
│ i64   ┆ i64  ┆ i64   ┆ str              ┆   ┆ f64      ┆ f64       ┆ f64      ┆ f64              │
╞═══════╪══════╪═══════╪══════════════════╪═══╪══════════╪═══════════╪══════════╪══════════════════╡
│ 1001  ┆ 1953 ┆ 1     ┆ Schleswig-Holste ┆ … ┆ 0.337441 ┆ 0.012157  ┆ 0.007815 ┆ 0.007815         │
│       ┆      ┆       ┆ in               ┆   ┆          ┆           ┆          ┆                  │
│ 1002  ┆ 1953 ┆ 1     ┆ Schleswig-Holste ┆ … ┆ 0.525035 ┆ 0.008926  ┆ 0.020774 ┆ 0.020774         │
│       ┆      ┆       ┆ in               ┆   ┆          ┆           ┆          ┆                  │
│ 1003  ┆ 1953 ┆ 1     ┆ Schleswig-Holste ┆ … ┆ 0.461617 ┆ 0.028876  ┆ 

ZeroDivisionError: division by zero

In [None]:
import pandas as pd
import json
import plotly.express as px


df = pd.read_csv(
    "https://raw.githubusercontent.com/plotly/Figure-Friday/refs/heads/main/2024/week-44/federal_cty_unharm.csv")
# df = pd.read_csv(r'data/federal_cty_unharm.csv')

# <p>This idx.rows have 'inf' as values. They will be replaced by the mean of the year-corresponding-election turnout.
# Kept this as a reference: Index([5442, 5461, 5496, 5521, 5891, 5910], dtype='int64')</p>
# df[df['turnout'].isnull()].index
df.loc[5442, 'turnout'] = 0.671801
df.loc[5891, 'turnout'] = 0.714955
df.loc[5910, 'turnout'] = 0.731389
df.loc[5461, 'turnout'] = 0.672186
df.loc[5496, 'turnout'] = 0.702586
df.loc[5521, 'turnout'] = 0.694358


gr_turnout = df.groupby(by=['year', 'state_name'])[
    ['turnout']].mean().reset_index()


# This repo is where I've found the GeoJson for Germany. It's worth mentioning!
# https://github.com/isellsoap/deutschlandGeoJSON/blob/main/README.md
# https://github.com/isellsoap/deutschlandGeoJSON/blob/main/2_bundeslaender/2_hoch.geo.json

mapping_states = {
    'Bavaria': 'Bayern',
    'Hesse': 'Hessen',
    'North Rhine-Westphalia': 'Nordrhein-Westfalen',
    'Rhineland-Palatinate': 'Rheinland-Pfalz',
    'Saxony': 'Sachsen',
    'Saxony-Anhalt': 'Sachsen-Anhalt',
    'Thuringia': 'Thüringen',
}
# this replacement is neccesary to match the GeoJson
gr_turnout_replaced = gr_turnout.replace(mapping_states)

# This is my local repo: 'data/...'
with open(r"data/Germany_geo.json", mode="r", encoding="utf-8") as read_file:
    data2 = json.load(read_file)

avg_turnout = gr_turnout_replaced['turnout'].mean()

fig_map2 = px.choropleth(
    gr_turnout_replaced,
    geojson=data2,
    featureidkey='properties.name',
    color='turnout',
    range_color=(0.6, 1),
    color_continuous_scale=px.colors.diverging.BrBG,
    color_continuous_midpoint=avg_turnout,
    locations='state_name',
    scope='europe',
    labels={'turnout': 'Turnout prop'},
    animation_frame='year',
    height=600, width=600
)
fig_map2.update_geos(fitbounds='locations', visible=False)
fig_map2.show()

In [None]:
df_explore = pl.read_csv('OpenRepair_Data_RepairCafeInt_202407.csv')
#df_explore.filter(pl.col('repair_status') == 'Unknown').shape
df_explore

Unnamed: 0_level_0,id,data_provider,country,partner_product_category,product_category,product_category_id,brand,year_of_manufacture,product_age,repair_status,repair_barrier_if_end_of_life,group_identifier,event_date,problem
i64,str,str,str,str,str,i64,str,f64,f64,str,str,str,str,str
0,"""rcint_82530""","""Repair Café International""","""FRA""","""Household appliances electric …","""Iron""",40,"""Calor""",,,"""Fixed""",,"""Bourgoin-Jallieu""","""2024-07-13""","""Electrovanne HS. Ne fonctionne…"
1,"""rcint_105090""","""Repair Café International""","""USA""","""Computer equipment / phones ~ …","""Laptop""",16,"""HP""",2021.0,3.0,"""End of life""",,"""New Orleans LA""","""2024-01-31""","""Motherboard is shot. Doesnt po…"
2,"""rcint_78676""","""Repair Café International""","""NLD""","""Display and sound equipment ~ …","""TV and gaming-related accessor…",33,"""Unknown""",,,"""End of life""",,"""Olst""","""2024-07-26""","""Geen. Doet het niet meer."""
3,"""rcint_78675""","""Repair Café International""","""NLD""","""Household appliances electric …","""Fan""",7,"""Tronix""",2004.0,20.0,"""End of life""","""No way to open product""","""Olst""","""2024-07-26""","""niet gevonden. Draadbreuk op s…"
4,"""rcint_78674""","""Repair Café International""","""NLD""","""Display and sound equipment ~ …","""Portable radio""",23,"""Denver""",,,"""End of life""",,"""Olst""","""2024-07-26""","""IC kapot. Doet het niet."""
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
75247,"""rcint_856""","""Repair Café International""","""NLD""","""Household appliances electric …","""Food processor""",37,"""Princess""",2008.0,9.0,"""Fixed""",,"""Capelle aan den IJssel""","""2017-05-04""","""de besturingsknop is afgebroke…"
75248,"""rcint_798""","""Repair Café International""","""NLD""","""Household appliances electric …","""Large home electrical""",17,"""Hema""",2007.0,10.0,"""Fixed""",,"""Capelle aan den IJssel""","""2017-04-20""","""kortsluiting. vocht. schoongem…"
75249,"""rcint_1302""","""Repair Café International""","""NLD""","""Computer equipment / phones ~ …","""Tablet""",30,"""Asus""",2010.0,7.0,"""Fixed""",,"""Oirsbeek""","""2017-06-03""","""Google werkt niet meer. Waarsc…"
75250,"""rcint_1297""","""Repair Café International""","""NLD""","""Household appliances electric …","""Kettle""",14,"""Tefal""",2015.0,2.0,"""Repairable""",,"""Oirsbeek""","""2017-06-03""","""water blijft koken. thermostaa…"
