# 3.0 Merging data

## 1.0 Swedish demographic data

In [72]:
import pandas as pd
import plotly_express as px
swedish_link = "https://sv.wikipedia.org/wiki/Sveriges_demografi"

swedish_DataFram = pd.read_html(swedish_link)

swedish_DataFram = swedish_DataFram[5]
swedish_DataFram = swedish_DataFram.rename({"Unnamed: 0": "År", "Folkmängd": "Befolkning"}, axis="columns")
swedish_DataFram.head()
px.line(swedish_DataFram, x="År", y="Befolkning")

In [3]:
swedish_1500_DataFrame = pd.read_html(swedish_link, header=1, skiprows=[12], match="Folkmängd")
swedish_1500_DataFrame = swedish_1500_DataFrame[0]
swedish_1500_DataFrame = swedish_1500_DataFrame.drop([7, 8, 9])
swedish_1500_DataFrame = swedish_1500_DataFrame.drop(["Totalt", "Promille"], axis=1)
swedish_1500_DataFrame = swedish_1500_DataFrame.rename({"Vid utgången av år": "År"}, axis = "columns")
swedish_1500_DataFrame
dataframes = [swedish_DataFram, swedish_1500_DataFrame]
swedish_complete_DataFrame = pd.concat(dataframes).reset_index()
swedish_complete_DataFrame = swedish_complete_DataFrame.sort_values(by="År")
px.line(swedish_complete_DataFrame, x="År", y="Folkmängd")

## 2. Denmark demogrphic data

In [34]:
denmark_link = "https://da.wikipedia.org/wiki/Danmarks_demografi"
denmark_DataFrame = pd.read_html(denmark_link, match="Befolkning")
denmark_DataFrame = denmark_DataFrame[0]
denmark_DataFrame = denmark_DataFrame.drop([0, 1]).reset_index()
denmark_DataFrame_Years = pd.DataFrame(denmark_DataFrame["År"])
denmark_1976_Forward = pd.DataFrame(denmark_DataFrame["År.1"])
denmark_1976_Forward = denmark_1976_Forward.rename({"År.1": "År"}, axis="columns")
denmark_1976_population = pd.DataFrame(denmark_DataFrame["Befolkning pr. 1. januar.1"])
denmark_1976_population = denmark_1976_population.rename({"Befolkning pr. 1. januar.1": "Befolkning pr. 1. januar"}, axis="columns")
clean_denmark_DataFrame = pd.concat([denmark_DataFrame["År"], denmark_DataFrame["Befolkning pr. 1. januar"]], axis=1)
clean_denmark_1976 = pd.concat([denmark_1976_Forward, denmark_1976_population], axis=1)
clean_denmark_DataFrame = clean_denmark_DataFrame.append(clean_denmark_1976)
clean_denmark_DataFrame = clean_denmark_DataFrame.rename({"Befolkning pr. 1. januar": "Folkmängd"}, axis="columns")

fig = px.line(clean_denmark_DataFrame, x="År", y="Folkmängd", title="Danmarks befolkning över åren")
fig.update_layout(hovermode="x")
fig.update_xaxes(showspikes=True,
                spikedash="solid")
fig.show()

## 3. Norwegain demographic

In [68]:
norway_link = "https://sv.wikipedia.org/wiki/Norges_demografi"
norway_english_link = "https://en.wikipedia.org/wiki/Demographics_of_Norway"
norway_dataframe = pd.read_html(norway_link, match="Total fertilitet")
norway_english_dataframe = pd.read_html(norway_english_link, match="Total fertility")
norway_dataframe = norway_dataframe[0]
norway_english_dataframe = norway_english_dataframe[0]
norway_english_dataframe.head()
norway_english_dataframe = norway_english_dataframe.rename({"Total fertility rates[fn 1][5][7]": "Total fertilitet"}, axis="columns")
norway_dataframe["Total fertilitet"] = norway_dataframe["Total fertilitet"].fillna(norway_english_dataframe["Total fertilitet"])
norway_dataframe["Befolkning i tusentals (x 1000)"] = norway_english_dataframe["Average population"]
norway_dataframe = norway_dataframe.rename({"Befolkning i tusentals (x 1000)" : "Befolkning", "Unnamed: 0": "År"}, axis="columns")
norway_complete_dataframe = pd.concat([norway_dataframe["År"], norway_dataframe["Befolkning"], norway_dataframe["Total fertilitet"]], axis=1)
norway_dataframe

Unnamed: 0,År,Befolkning,Födda,Döda,Naturlig förändring,Födelsetal per 1000 invånare,Dödstal per 1000 invånare,Naturlig förändring per 1000 invånare,Total fertilitet
0,1900,2231000,66 229,35 345,30 884,297,158,138,4.40
1,1901,2255000,67 303,33 821,33 482,298,150,148,4.37
2,1902,2276000,66 494,31 670,34 824,292,139,153,4.26
3,1903,2288000,65 470,33 847,31 623,286,148,138,4.16
4,1904,2298000,64 143,32 895,31 248,279,143,136,4.07
...,...,...,...,...,...,...,...,...,...
108,2008,4768000,60 497,41 712,18 785,127,87,39,196.00
109,2009,4829000,61 807,41 449,20 358,128,86,42,198.00
110,2010,4889000,61 442,41 500,19 942,126,85,41,195.00
111,2011,4953000,60 220,41 393,18 827,121,83,38,188.00


## 4. Merge Sweden-Norway

In [133]:
data_frame_tags = ["År", "Befolkning", "Total fertilitet"]
data_frame_list_index = 0
sweden_step_dataframe = pd.concat([swedish_DataFram["År"], swedish_DataFram["Befolkning"], swedish_DataFram["Total fertilitet"]], axis=1)
norway_english_dataframe = norway_english_dataframe.rename({"Unnamed: 0": "Year"}, axis="columns")
norway_step_dataframe = pd.concat([norway_english_dataframe["Year"], norway_english_dataframe["Average population"], norway_english_dataframe["Total fertilitet"]], axis=1)
sweden_step_dataframe = sweden_step_dataframe.rename({"Befolkning": "Sweden population", "Total fertilitet": "Sweden Total fertility", "År": "Year"}, axis="columns")
norway_step_dataframe = norway_step_dataframe.rename({"Average population": "Norway population", "Total fertilitet": "Norway Total fertility", "År": "Year"}, axis="columns")
norway_step_dataframe = norway_step_dataframe.drop(121)
norway_step_dataframe = norway_step_dataframe.drop("Year", axis="columns")
sweden_norway_dataframe = pd.concat([sweden_step_dataframe, norway_step_dataframe], axis=1)

Unnamed: 0,Year,Sweden population,Sweden Total fertility,Norway population,Norway Total fertility
0,1900,5 117 000,402,2231000,4.40
1,1901,5 156 000,404,2255000,4.37
2,1902,5 187 000,395,2276000,4.26
3,1903,5 210 000,382,2288000,4.16
4,1904,5 241 000,383,2298000,4.07
...,...,...,...,...,...
116,2016,9 995 000,185,5213000,1.71
117,2017,10 120 000,179,5258000,1.62
118,2018,10 230 000,175,5296000,1.56
119,2019,10 327 589,170,5328000,1.53


In [134]:
fig = px.line(sweden_norway_dataframe, x="Year", y=["Sweden population", "Norway population"])
fig.update_layout(hovermode="x")
fig.update_xaxes(
    showspikes=True,
    spikedash="solid"
)
fig.show()

ValueError: Plotly Express cannot process wide-form data with columns of different type.