Skip to content

Latest commit

 

History

History
311 lines (255 loc) · 6.3 KB

household_expense_statistics.livemd

File metadata and controls

311 lines (255 loc) · 6.3 KB

家計調査データ取得

Mix.install(
  [
    {:explorer, "~> 0.5"},
    {:codepagex, "~> 0.1"},
    {:kino, "~> 0.8"},
    {:kino_vega_lite, "~> 0.1"},
    {:req, "~> 0.3"}
  ],
  config: [
    codepagex: [
      encodings: ["VENDORS/MICSFT/WINDOWS/CP932"]
    ]
  ]
)

出典

総務省統計局ホームページ

家計調査(家計収支編) 時系列データ(二人以上の世帯)

https://www.stat.go.jp/data/kakei/longtime/index.html#time

  • 月 全品目(2015年改定)
  • 月 全品目(2020年改定)

(2023年2月20日に利用)

準備

alias Explorer.DataFrame
alias Explorer.Series
require Explorer.DataFrame
shift_jis = "VENDORS/MICSFT/WINDOWS/CP932"

データ取得

string_value =
  "https://www.stat.go.jp/data/kakei/longtime/csv/h-mon-2020.csv"
  |> Req.get!()
  |> then(&Codepagex.to_string!(&1.body, shift_jis))

整形

[header_row | data_rows] =
  string_value
  |> String.split("\r\n")
  |> Enum.slice(3..-1)
  |> Enum.map(&String.split(&1, ","))
fixed_cols = Enum.slice(header_row, 0..7)
month_cols =
  header_row
  |> Enum.slice(8..-1)
  |> Enum.map(fn month ->
    month
    |> String.trim()
    |> String.replace("月", "")
    |> then(&String.slice("0" <> &1, -2..-1))
  end)
  |> Enum.chunk_every(12)
  |> Enum.zip([2020, 2021, 2022])
  |> Enum.flat_map(fn {month_list, year} ->
    Enum.map(month_list, fn month -> "#{year}-#{month}-01" end)
  end)
  |> dbg()
header_row = fixed_cols ++ month_cols
df_2020 =
  [header_row | data_rows]
  |> Enum.map(&Enum.join(&1, ","))
  |> Enum.join("\n")
  |> DataFrame.load_csv!()

Kino.DataTable.new(df_2020)
df_2015 =
  "https://www.stat.go.jp/data/kakei/longtime/csv/h-mon-2015.csv"
  |> Req.get!()
  |> then(&Codepagex.to_string!(&1.body, shift_jis))
  |> String.split("\r\n")
  |> Enum.slice(3..-1)
  |> Enum.map(&String.split(&1, ","))
  |> then(fn [header_row | data_rows] ->
    fixed_cols = Enum.slice(header_row, 0..7)

    month_cols =
      header_row
      |> Enum.slice(8..-1)
      |> Enum.map(fn month ->
        month
        |> String.trim()
        |> String.replace("月", "")
        |> then(&String.slice("0" <> &1, -2..-1))
      end)
      |> Enum.chunk_every(12)
      |> Enum.zip(2015..2019)
      |> Enum.flat_map(fn {month_list, year} ->
        Enum.map(month_list, fn month -> "#{year}-#{month}-01" end)
      end)

    header_row = fixed_cols ++ month_cols

    [header_row | data_rows]
    |> Enum.map(&Enum.join(&1, ","))
    |> Enum.join("\n")
  end)
  |> DataFrame.load_csv!()

Kino.DataTable.new(df_2015)
df_all =
  DataFrame.join(
    DataFrame.select(df_2015, &(&1 != "表側連番")),
    DataFrame.select(df_2020, &(&1 != "表側連番")),
    how: :outer
  )

Kino.DataTable.new(df_all)
pivot_df =
  DataFrame.pivot_longer(
    df_all,
    &String.starts_with?(&1, "20"),
    names_to: "年月",
    values_to: "支出金額"
  )

Kino.DataTable.new(pivot_df)
date_list = Series.to_list(pivot_df["年月"])

month_series =
  date_list
  |> Enum.map(&(&1 |> String.split("-") |> Enum.at(1) |> String.to_integer()))
  |> Series.from_list()

year_series =
  date_list
  |> Enum.map(&(&1 |> String.split("-") |> Enum.at(0) |> String.to_integer()))
  |> Series.from_list()

household_df =
  pivot_df
  |> DataFrame.put("月", month_series)
  |> DataFrame.put("年", year_series)

Kino.DataTable.new(household_df)

ローカルへのダウンロード

Kino.Download.new(
  fn -> DataFrame.dump_csv!(household_df) end,
  filename: "家計支出統計_品目年月別.csv"
)

集約項目一覧

household_df
|> DataFrame.filter(大分類 == "-")
|> DataFrame.distinct(["品目分類"])
|> Kino.DataTable.new()

大分類一覧

household_df
|> DataFrame.filter(
  大分類 != "-" and
    中分類 == "-" and
    小分類 == "-" and
    中間計 == "-" and
    符号 == "-"
)
|> DataFrame.distinct(["大分類", "品目分類"])
|> Kino.DataTable.new()

中分類一覧

household_df
|> DataFrame.filter(
  大分類 != "-" and
    中分類 != "-" and
    小分類 == "-" and
    中間計 == "-" and
    符号 == "-"
)
|> DataFrame.distinct(["大分類", "中分類", "品目分類"])
|> Kino.DataTable.new()

小分類一覧

household_df
|> DataFrame.filter(
  大分類 != "-" and
    中分類 != "-" and
    小分類 != "-" and
    中間計 == "-" and
    符号 == "-"
)
|> DataFrame.distinct(["大分類", "中分類", "小分類", "品目分類"])
|> Kino.DataTable.new()

中間計一覧

household_df
|> DataFrame.filter(
  大分類 != "-" and
    中分類 != "-" and
    小分類 != "-" and
    中間計 != "-" and
    符号 == "-"
)
|> DataFrame.distinct(["中間計", "品目分類"])
|> Kino.DataTable.new()

符号一覧

household_df
|> DataFrame.filter(符号 != "-")
|> DataFrame.distinct(["符号", "品目分類"])
|> Kino.DataTable.new()

ケーキの支出推移

cake_df = DataFrame.filter(household_df, 符号 == "344")

Kino.DataTable.new(cake_df)
month = Series.to_list(cake_df["年月"])
expenses = Series.to_list(cake_df["支出金額"])

VegaLite.new(width: 700, title: "ケーキ支出金額推移")
|> VegaLite.data_from_values(x: month, y: expenses)
|> VegaLite.mark(:line, tooltip: true)
|> VegaLite.encode_field(:x, "x", type: :temporal, title: "年月")
|> VegaLite.encode_field(:y, "y", type: :quantitative, title: "支出金額")
latest_cake_df = DataFrame.filter(cake_df,== 2022)

month = Series.to_list(latest_cake_df["月"])
expenses = Series.to_list(latest_cake_df["支出金額"])

VegaLite.new(width: 700, title: "ケーキ支出金額推移")
|> VegaLite.data_from_values(x: month, y: expenses)
|> VegaLite.mark(:line, tooltip: true)
|> VegaLite.encode_field(:x, "x", type: :ordinal, title: "年月")
|> VegaLite.encode_field(:y, "y", type: :quantitative, title: "支出金額")
latest_cake_df
|> DataFrame.group_by("品目分類")
|> DataFrame.mutate(
  最小: min(支出金額),
  最大: max(支出金額),
  最小最大比: max(支出金額) / min(支出金額)
)
|> DataFrame.select(["最小", "最大", "最小最大比"])
|> DataFrame.to_rows()
|> List.first()