# Fetching and Formatting Inflation Data from SCB
This script uses the SCB API to retrieve inflation data, specifically the Consumer Price Index (KPI), for the period from 2000 to 2023. It then formats the data, extracts relevant information, and save it to an Excel file named 'inflation.xlsx'. The script utilizes the PySCBWrapper, NumPy, and Pandas libraries.

In [1]:
# Import necessary libraries
from pyscbwrapper import SCB
import numpy as np
import pandas as pd

In [2]:
# Initialize SCB object for Swedish data
scb = SCB('sv')
scb.info()

[{'id': 'AA', 'type': 'l', 'text': 'Ämnesövergripande statistik'},
 {'id': 'AM', 'type': 'l', 'text': 'Arbetsmarknad'},
 {'id': 'BE', 'type': 'l', 'text': 'Befolkning'},
 {'id': 'BO', 'type': 'l', 'text': 'Boende, byggande och bebyggelse'},
 {'id': 'EN', 'type': 'l', 'text': 'Energi'},
 {'id': 'FM', 'type': 'l', 'text': 'Finansmarknad'},
 {'id': 'HA', 'type': 'l', 'text': 'Handel med varor och tjänster'},
 {'id': 'HE', 'type': 'l', 'text': 'Hushållens ekonomi'},
 {'id': 'HS', 'type': 'l', 'text': 'Hälso- och sjukvård'},
 {'id': 'JO', 'type': 'l', 'text': 'Jord- och skogsbruk, fiske'},
 {'id': 'KU', 'type': 'l', 'text': 'Kultur och fritid'},
 {'id': 'LE', 'type': 'l', 'text': 'Levnadsförhållanden'},
 {'id': 'ME', 'type': 'l', 'text': 'Demokrati'},
 {'id': 'MI', 'type': 'l', 'text': 'Miljö'},
 {'id': 'NR', 'type': 'l', 'text': 'Nationalräkenskaper'},
 {'id': 'NV', 'type': 'l', 'text': 'Näringsverksamhet'},
 {'id': 'OE', 'type': 'l', 'text': 'Offentlig ekonomi'},
 {'id': 'PR', 'type': 'l'

In [3]:
# Navigate to the desired data level using codes
scb.go_down('PR')
scb.info()

[{'id': 'PR0502', 'type': 'l', 'text': 'Byggkostnadsindex för byggnader'},
 {'id': 'PR0501', 'type': 'l', 'text': 'Byggnadsprisindex (BPI)'},
 {'id': 'PR0101', 'type': 'l', 'text': 'Konsumentprisindex (KPI)'},
 {'id': 'PR0301',
  'type': 'l',
  'text': 'Prisindex i producent- och importled (PPI)'},
 {'id': 'PR0401', 'type': 'l', 'text': 'Köpkraftspariteter'}]

In [4]:
scb.go_down('PR0101')
scb.info()

[{'id': 'PR0101A', 'type': 'l', 'text': 'Konsumentprisindex (KPI)'},
 {'id': 'PR0101H',
  'type': 'l',
  'text': 'Konsumentprisindex med konstant skatt (KPI-KS)'},
 {'id': 'PR0101G',
  'type': 'l',
  'text': 'Konsumentprisindex med fast ränta (KPIF)'},
 {'id': 'PR0101I',
  'type': 'l',
  'text': 'Konsumentprisindex med fast ränta och konstant skatt (KPIF-KS)'},
 {'id': 'PR0101J',
  'type': 'l',
  'text': 'Konsumentprisindex med fast ränta exklusive energi (KPIF-XE)'},
 {'id': 'PR0101C',
  'type': 'l',
  'text': 'Harmoniserat konsumentprisindex (HIKP)'},
 {'id': 'PR0101K',
  'type': 'l',
  'text': 'Harmoniserat konsumentprisindex med konstant skatt (HIKP-KS)'},
 {'id': 'PR0101E', 'type': 'l', 'text': 'Prisbasbelopp'},
 {'id': 'PR0101F', 'type': 'l', 'text': 'Äldre tabeller som ej uppdateras'},
 {'id': 'PR0101Z', 'type': 'l', 'text': 'Riksbankstabeller'},
 {'id': 'PR0101S', 'type': 'l', 'text': 'Ekonomiska indikatorer'},
 {'id': 'PR0101X', 'type': 'l', 'text': 'Nyckeltal'}]

In [5]:
scb.go_down('PR0101A')
scb.info()

[{'id': 'KPItotM',
  'type': 't',
  'text': 'Konsumentprisindex (KPI), totalt, 1980=100. Månad 1980M01 - 2023M09',
  'updated': '2023-10-13T08:00:00'},
 {'id': 'KPICOI80MN',
  'type': 't',
  'text': 'Konsumentprisindex (KPI) efter varu-/tjänstegrupp (COICOP), 1980=100. Månad 1980M01 - 2023M09',
  'updated': '2023-10-13T08:00:00'},
 {'id': 'KPI19Bas1980',
  'type': 't',
  'text': 'Konsumentprisindex (KPI) efter produktgrupp, 1980=100. Månad 1980M01 - 2023M09',
  'updated': '2023-10-13T08:00:00'},
 {'id': 'KPILevindexM',
  'type': 't',
  'text': 'Konsumentprisindex (KPI)/Levnadskostnadsindex utan direkta skatter och sociala förmåner, juli 1914=100. Månad 1914M01 - 2023M09',
  'updated': '2023-10-13T08:00:00'},
 {'id': 'KPIFastAmed',
  'type': 't',
  'text': 'Konsumentprisindex (KPI) fastställda årsmedeltal, totalt, 1980=100. År 1980 - 2022',
  'updated': '2023-01-13T08:00:00'},
 {'id': 'KPISkuggAr',
  'type': 't',
  'text': 'Konsumentprisindex (KPI) årsmedeltal totalt, skuggindextal, 198

In [6]:
scb.go_down('KPItotM')
scb.info()

{'title': 'Konsumentprisindex (KPI), totalt, 1980=100 efter tabellinnehåll och månad',
 'variables': [{'code': 'ContentsCode',
   'text': 'tabellinnehåll',
   'values': ['000004VU', '000004VT', '000004VV', '000004VW', '000004VX'],
   'valueTexts': ['KPI, fastställda tal',
    'KPI, skuggindex',
    'Årsförändring',
    'Månadsförändring',
    'År-mån-index']},
  {'code': 'Tid',
   'text': 'månad',
   'values': ['1980M01',
    '1980M02',
    '1980M03',
    '1980M04',
    '1980M05',
    '1980M06',
    '1980M07',
    '1980M08',
    '1980M09',
    '1980M10',
    '1980M11',
    '1980M12',
    '1981M01',
    '1981M02',
    '1981M03',
    '1981M04',
    '1981M05',
    '1981M06',
    '1981M07',
    '1981M08',
    '1981M09',
    '1981M10',
    '1981M11',
    '1981M12',
    '1982M01',
    '1982M02',
    '1982M03',
    '1982M04',
    '1982M05',
    '1982M06',
    '1982M07',
    '1982M08',
    '1982M09',
    '1982M10',
    '1982M11',
    '1982M12',
    '1983M01',
    '1983M02',
    '1983M03',
    

In [7]:
# Get information about available variables at the current level
scb.get_variables()

{'tabellinnehåll': ['KPI, fastställda tal',
  'KPI, skuggindex',
  'Årsförändring',
  'Månadsförändring',
  'År-mån-index'],
 'månad': ['1980M01',
  '1980M02',
  '1980M03',
  '1980M04',
  '1980M05',
  '1980M06',
  '1980M07',
  '1980M08',
  '1980M09',
  '1980M10',
  '1980M11',
  '1980M12',
  '1981M01',
  '1981M02',
  '1981M03',
  '1981M04',
  '1981M05',
  '1981M06',
  '1981M07',
  '1981M08',
  '1981M09',
  '1981M10',
  '1981M11',
  '1981M12',
  '1982M01',
  '1982M02',
  '1982M03',
  '1982M04',
  '1982M05',
  '1982M06',
  '1982M07',
  '1982M08',
  '1982M09',
  '1982M10',
  '1982M11',
  '1982M12',
  '1983M01',
  '1983M02',
  '1983M03',
  '1983M04',
  '1983M05',
  '1983M06',
  '1983M07',
  '1983M08',
  '1983M09',
  '1983M10',
  '1983M11',
  '1983M12',
  '1984M01',
  '1984M02',
  '1984M03',
  '1984M04',
  '1984M05',
  '1984M06',
  '1984M07',
  '1984M08',
  '1984M09',
  '1984M10',
  '1984M11',
  '1984M12',
  '1985M01',
  '1985M02',
  '1985M03',
  '1985M04',
  '1985M05',
  '1985M06',
  '1985M

In [8]:
# Defining the start and end years
start_year = 2000
end_year = 2023

# Defining the months
months = ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12"]

# Initialize an empty list to store the formatted dates
formatted_dates = []

# Loop through the years and months
for year in range(start_year, end_year + 1):
    for month in months:
        formatted_date = f"{year}M{month}"
        formatted_dates.append(formatted_date)

# The 'formatted_dates' list now contains the desired date strings
print(formatted_dates)


['2000M01', '2000M02', '2000M03', '2000M04', '2000M05', '2000M06', '2000M07', '2000M08', '2000M09', '2000M10', '2000M11', '2000M12', '2001M01', '2001M02', '2001M03', '2001M04', '2001M05', '2001M06', '2001M07', '2001M08', '2001M09', '2001M10', '2001M11', '2001M12', '2002M01', '2002M02', '2002M03', '2002M04', '2002M05', '2002M06', '2002M07', '2002M08', '2002M09', '2002M10', '2002M11', '2002M12', '2003M01', '2003M02', '2003M03', '2003M04', '2003M05', '2003M06', '2003M07', '2003M08', '2003M09', '2003M10', '2003M11', '2003M12', '2004M01', '2004M02', '2004M03', '2004M04', '2004M05', '2004M06', '2004M07', '2004M08', '2004M09', '2004M10', '2004M11', '2004M12', '2005M01', '2005M02', '2005M03', '2005M04', '2005M05', '2005M06', '2005M07', '2005M08', '2005M09', '2005M10', '2005M11', '2005M12', '2006M01', '2006M02', '2006M03', '2006M04', '2006M05', '2006M06', '2006M07', '2006M08', '2006M09', '2006M10', '2006M11', '2006M12', '2007M01', '2007M02', '2007M03', '2007M04', '2007M05', '2007M06', '2007M07'

In [9]:
# Set the query parameters for the SCB API using the formatted dates
scb.set_query(månad=formatted_dates)

In [10]:
# Retrieve data from the SCB API
scb_data = scb.get_data()

In [11]:
# Extract the 'data' key from the response
scb_uttag = scb_data['data']

In [12]:
# Create a DataFrame from the extracted data
df = pd.DataFrame(scb_uttag)
df.head()

Unnamed: 0,key,values
0,[2000M01],"[257.50, 257.82, 0.4, -0.8, ..]"
1,[2000M02],"[258.70, 259.04, 0.9, 0.5, ..]"
2,[2000M03],"[259.90, 260.28, 1.0, 0.5, ..]"
3,[2000M04],"[260.00, 260.05, 0.7, -0.1, ..]"
4,[2000M05],"[261.30, 261.29, 1.0, 0.5, ..]"


In [13]:
# new df from the column of lists
split_df = pd.DataFrame(df['key'].tolist(), columns=['Dates'])
split_df.head()

Unnamed: 0,Dates
0,2000M01
1,2000M02
2,2000M03
3,2000M04
4,2000M05


In [14]:
# Create a new DataFrame from the 'key' column, splitting it into 'Dates'
split_df2 = pd.DataFrame(df['values'].tolist(), columns=['KPI, faställda tal', 'KPI, skuggindex', 'Årsförändring', 'Månadsförändring', 'År-mån-index'])
split_df2.head()

Unnamed: 0,"KPI, faställda tal","KPI, skuggindex",Årsförändring,Månadsförändring,År-mån-index
0,257.5,257.82,0.4,-0.8,..
1,258.7,259.04,0.9,0.5,..
2,259.9,260.28,1.0,0.5,..
3,260.0,260.05,0.7,-0.1,..
4,261.3,261.29,1.0,0.5,..


In [15]:
# concat df and split_df
df = pd.concat([split_df, split_df2], axis=1)
# display df
df.head()

Unnamed: 0,Dates,"KPI, faställda tal","KPI, skuggindex",Årsförändring,Månadsförändring,År-mån-index
0,2000M01,257.5,257.82,0.4,-0.8,..
1,2000M02,258.7,259.04,0.9,0.5,..
2,2000M03,259.9,260.28,1.0,0.5,..
3,2000M04,260.0,260.05,0.7,-0.1,..
4,2000M05,261.3,261.29,1.0,0.5,..


In [16]:
# Extract year and month from the 'Dates' column and add '01' for the day
df['Dates'] = df['Dates'].str.extract('(\d{4})M(\d{2})').apply(lambda x: x[0] + x[1] + '01', axis=1)



# Convert the 'Date' column to datetime format 
df['Dates'] = pd.to_datetime(df['Dates'], format='%Y%m%d')

df.head()


Unnamed: 0,Dates,"KPI, faställda tal","KPI, skuggindex",Årsförändring,Månadsförändring,År-mån-index
0,2000-01-01,257.5,257.82,0.4,-0.8,..
1,2000-02-01,258.7,259.04,0.9,0.5,..
2,2000-03-01,259.9,260.28,1.0,0.5,..
3,2000-04-01,260.0,260.05,0.7,-0.1,..
4,2000-05-01,261.3,261.29,1.0,0.5,..


In [17]:
# Set 'Dates' as the index
df.set_index('Dates', inplace=True)
df.head()

Unnamed: 0_level_0,"KPI, faställda tal","KPI, skuggindex",Årsförändring,Månadsförändring,År-mån-index
Dates,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2000-01-01,257.5,257.82,0.4,-0.8,..
2000-02-01,258.7,259.04,0.9,0.5,..
2000-03-01,259.9,260.28,1.0,0.5,..
2000-04-01,260.0,260.05,0.7,-0.1,..
2000-05-01,261.3,261.29,1.0,0.5,..


In [18]:
# Save the DataFrame to an Excel file
df.to_excel('inflation.xlsx', index=True)
