In [1]:
import pandas as pd
import numpy as np

Source data for omx30 from url:
http://www.nasdaqomxnordic.com/index/historiska_kurser?Instrument=SE0000337842

You can specify date-range and download csv-file

In [15]:
# I've downloaded csv-file and named it omx30_source and uploaded it to same folder
# read csv_file with specifications sep; and skiprows=1
# save info in df named omx30_raw
omx30_raw = pd.read_csv("omx30_source.csv",sep=";", skiprows=1)

# Cleaning csv

In [17]:
# Inspecting df
# I'm only interested in column "Datum" and "Stängn.kurs" so I'm getting rid of the other columns
omx30_raw.head()

Unnamed: 0,Datum,Högstakurs,Lägstakurs,Stängn.kurs,Genomsnittspris,Tot.vol.,Oms,Unnamed: 7
0,2020-09-11,181200,179221,180146,,1,,
1,2020-09-10,181376,179243,180285,,1,,
2,2020-09-09,181282,177938,181258,,1,,
3,2020-09-08,180125,176843,178260,,1,,
4,2020-09-07,180091,177394,179926,,1,,


In [19]:
# Keeping only columns of interest
omx30_raw = omx30_raw[["Datum", "Stängn.kurs"]]

In [20]:
# Inspecting df
omx30_raw.head()

Unnamed: 0,Datum,Stängn.kurs
0,2020-09-11,180146
1,2020-09-10,180285
2,2020-09-09,181258
3,2020-09-08,178260
4,2020-09-07,179926


In [22]:
# renaming columns
omx30_raw.rename(columns = {"Datum":"datum", "Stängn.kurs":"SK"}, inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


In [29]:
# Replacing , for numerical decimals to . 
omx30_raw.SK = omx30_raw.SK.str.replace(",",".")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value


In [32]:
# changing dtype for datum to datetime
omx30_raw.datum = pd.to_datetime(omx30_raw.datum)

In [37]:
# changing dtype for SK to float
omx30_raw.SK = omx30_raw.SK.astype(float)

In [38]:
# Inspecting df-info
omx30_raw.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1930 entries, 0 to 1929
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   datum   1930 non-null   datetime64[ns]
 1   SK      1930 non-null   float64       
dtypes: datetime64[ns](1), float64(1)
memory usage: 30.3 KB


# add columns for year and month

In [41]:
# Add a column for year
omx30_raw["år"] = omx30_raw["datum"].dt.year

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  omx30_raw["år"] = omx30_raw["datum"].dt.year


In [40]:
# add column for month 
omx30_raw["månad"] = omx30_raw["datum"].dt.month

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  omx30_raw["månad"] = omx30_raw["datum"].dt.month


In [42]:
# Inspecting df
omx30_raw.head()

Unnamed: 0,datum,SK,månad,år
0,2020-09-11,1801.46,9,2020
1,2020-09-10,1802.85,9,2020
2,2020-09-09,1812.58,9,2020
3,2020-09-08,1782.6,9,2020
4,2020-09-07,1799.26,9,2020


In [49]:
# add column with year and month
omx30_raw["år_månad"] = pd.to_datetime(omx30_raw.år.astype(str) + "-" + omx30_raw.månad.astype(str))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  omx30_raw["år_månad"] = pd.to_datetime(omx30_raw.år.astype(str) + "-" + omx30_raw.månad.astype(str))


In [50]:
# Inspecting df
omx30_raw.head()

Unnamed: 0,datum,SK,månad,år,år_månad
0,2020-09-11,1801.46,9,2020,2020-09-01
1,2020-09-10,1802.85,9,2020,2020-09-01
2,2020-09-09,1812.58,9,2020,2020-09-01
3,2020-09-08,1782.6,9,2020,2020-09-01
4,2020-09-07,1799.26,9,2020,2020-09-01


# Groupby: year and month. Calc: avg. SK

In [54]:
# new df with avg. SK grouped by year and month
# so every row has the average SK for a specific month
# the table has every monthly average SK for the period of interest
omx30_yrmo_avg = omx30_raw.groupby("år_månad")[["SK"]].mean()

In [61]:
# Inspecting df
omx30_yrmo_avg

Unnamed: 0_level_0,SK,SK_n
år_månad,Unnamed: 1_level_1,Unnamed: 2_level_1
2013-01-01,1140.678182,1.000000
2013-02-01,1183.766500,1.037774
2013-03-01,1205.377500,1.056720
2013-04-01,1178.400000,1.033070
2013-05-01,1225.900476,1.074712
...,...,...
2020-05-01,1559.440526,1.367117
2020-06-01,1669.241429,1.463376
2020-07-01,1738.922174,1.524463
2020-08-01,1766.410000,1.548561


In [56]:
# I want to normalize the values 
# I'm achieving this through dividing each value with the starting value
def omx30_SK_n(x):
    return x / 1140.678182

In [58]:
# adding column with normalized values
omx30_yrmo_avg["SK_n"] = omx30_yrmo_avg.SK.apply(omx30_SK_n)

In [59]:
# Inspecting df
# Looks got, ready to write to csv
omx30_yrmo_avg

Unnamed: 0_level_0,SK,SK_n
år_månad,Unnamed: 1_level_1,Unnamed: 2_level_1
2013-01-01,1140.678182,1.000000
2013-02-01,1183.766500,1.037774
2013-03-01,1205.377500,1.056720
2013-04-01,1178.400000,1.033070
2013-05-01,1225.900476,1.074712
...,...,...
2020-05-01,1559.440526,1.367117
2020-06-01,1669.241429,1.463376
2020-07-01,1738.922174,1.524463
2020-08-01,1766.410000,1.548561


# Writing final df to csv

In [62]:
omx30_yrmo_avg.to_csv("omx30_v1.csv")