Data from https://openrepair.org/open-data/downloads/

(Source: https://openrepair.org/news/open-repair-alliance-dataset-increases-to-81000-records-of-community-repair/)

In [31]:
import polars as pl
from kedro.config import OmegaConfigLoader
from kedro.io import DataCatalog


# See https://github.com/kedro-org/kedro/issues/2583
conf_loader = OmegaConfigLoader("conf", config_patterns={"catalog": ["catalog.yml", "**/catalog.yml"]})

conf_catalog = conf_loader.get("catalog.yml")
catalog = DataCatalog.from_config(conf_catalog)

pl.Config.set_fmt_str_lengths(100)

polars.config.Config

In [32]:
df = catalog.load("openrepair-0_3-events-raw")
df.head()

DatasetNotFoundError: Dataset 'openrepair-0_3-events-raw' not found in the catalog

In [3]:
len(df)

81189

In [4]:
categories = pl.read_csv("data/OpenRepairData_v0.3_Product_Categories.csv")
categories.head()

product_category_id,product_category
i64,str
1,"""Aircon/dehumidifier"""
2,"""Battery/charger/adapter"""
3,"""Decorative or safety lights"""
4,"""Desktop computer"""
5,"""Digital compact camera"""


In [5]:
df = df.select(pl.all().exclude("product_category")).join(
    categories, on="product_category_id"
)
df.head()

id,data_provider,country,partner_product_category,product_category_id,brand,year_of_manufacture,product_age,repair_status,repair_barrier_if_end_of_life,group_identifier,event_date,problem,product_category
str,str,str,str,i64,str,i64,f64,str,str,str,date,str,str
"""anstiftung_2749""","""anstiftung""","""DEU""","""Elektro divers ~ Nähmaschine""",27,"""Unknown""",,,"""Repairable""",,"""5073""",2012-06-20,"""Funktionierte nicht mehr. Fehler und Lösung nicht dokumentiert.""","""Sewing machine"""
"""anstiftung_2750""","""anstiftung""","""DEU""","""Computer ~ Laptop""",16,"""Unknown""",,,"""Repairable""",,"""5073""",2012-06-20,"""Wurde schnell heiß. Der Lüfter war sehr verschmutzt, wurde gereinigt. Das nächste Mal müssen wir no…","""Laptop"""
"""anstiftung_2746""","""anstiftung""","""DEU""","""Computer ~ Drucker""",25,"""Unknown""",,,"""Fixed""",,"""5073""",2012-06-20,"""Funktionierte nicht mehr. Fehler und Lösung nicht dokumentiert.""","""Printer/scanner"""
"""anstiftung_2747""","""anstiftung""","""DEU""","""Unterhaltungselektronik ~ Kopfhörer""",11,"""Unknown""",,,"""Fixed""",,"""5073""",2012-06-20,"""Funktionierte nicht mehr. Fehler und Lösung nicht dokumentiert.""","""Headphones"""
"""anstiftung_2742""","""anstiftung""","""DEU""","""Haushaltsgeräte ~ Spielzeug""",32,"""Unknown""",,,"""Fixed""",,"""5073""",2012-09-19,"""Die Beine der Puppe waren ab. Sie waren mit Fäden an der Hüfte befestigt, damit sie sich auf drehen…","""Toy"""


In [6]:
len(df)

81189

In [7]:
df["country"].value_counts().sort("counts", descending=True).head()

country,counts
str,u32
"""GBR""",23141
"""NLD""",22986
"""DEU""",10532
"""BEL""",7854
"""DNK""",5150


In [8]:
df["product_category"].value_counts().sort(by="counts", descending=True).head(10)

product_category,counts
str,u32
"""Lamp""",5524
"""Vacuum""",5437
"""Laptop""",5092
"""Hi-Fi separates""",5015
"""Small kitchen item""",4682
"""Coffee maker""",4638
"""Power tool""",4038
"""Portable radio""",3968
"""Food processor""",3363
"""Small home electrical""",2766


Minor correction needed for repair barrier: one part of [the standard](https://standard.openrepair.org/standard.html) specifies `"Item too worn out"`, the other says `"Product too worn out"`, but they are the same.

In [9]:
df["repair_barrier_if_end_of_life"].value_counts()

repair_barrier_if_end_of_life,counts
str,u32
"""No way to open the product""",783
"""Lack of equipment""",301
"""Repair information not available""",511
"""Spare parts too expensive""",700
,76452
"""Spare parts not available""",1695
"""Item too worn out""",515
"""Product too worn out""",232


In [10]:
df = df.with_columns(
    pl.col("repair_barrier_if_end_of_life").map_dict(
        {"Item too worn out": "Product too worn out"},
        default=pl.col("repair_barrier_if_end_of_life"),
    )
)
df["repair_barrier_if_end_of_life"].value_counts()

repair_barrier_if_end_of_life,counts
str,u32
"""Spare parts too expensive""",700
"""Product too worn out""",747
"""Repair information not available""",511
"""Spare parts not available""",1695
"""Lack of equipment""",301
"""No way to open the product""",783
,76452
