# What factors influence the technological innovation of countries?

Import libraries.

In [1]:
import polars as pl

Load the processed data.

In [None]:
data: pl.DataFrame = pl.read_csv("../data/cleaned/data.csv")

# Get the first 5 rows
data.head()

(195, 30)


Country,Density(P/Km2),Abbreviation,Agricultural Land(%),Land Area(Km2),Armed Forces size,Birth Rate,Co2-Emissions,CPI,CPI Change (%),Currency-Code,Fertility Rate,Forested Area (%),Gasoline Price,GDP,Gross primary education enrollment (%),Gross tertiary education enrollment (%),Infant mortality,Largest city,Life expectancy,Maternal mortality ratio,Minimum wage,Out of pocket health expenditure,Physicians per thousand,Population,Population: Labor force participation (%),Tax revenue (%),Total tax rate,Unemployment rate,Urban population
str,i64,str,f64,i64,i64,f64,i64,f64,f64,str,f64,f64,f64,f64,f64,f64,f64,str,f64,i64,f64,f64,f64,i64,f64,f64,f64,f64,i64
"""Afghanistan""",60,"""AF""",58.1,652230,323000.0,32.49,8672,149.9,2.3,"""AFN""",4.47,2.1,0.7,19101000000.0,104.0,9.7,47.9,"""Kabul""",64.5,638.0,0.43,78.4,0.28,38041754,48.9,9.3,71.4,11.12,9797273
"""Albania""",105,"""AL""",43.1,28748,9000.0,11.78,4536,119.05,1.4,"""ALL""",1.62,28.1,1.36,15278000000.0,107.0,55.0,7.8,"""Tirana""",78.5,15.0,1.12,56.9,1.2,2854191,55.7,18.6,36.6,12.33,1747593
"""Algeria""",18,"""DZ""",17.4,2381741,317000.0,24.28,150006,151.36,2.0,"""DZD""",3.02,0.8,0.28,169990000000.0,109.9,51.4,20.1,"""Algiers""",76.7,112.0,0.95,28.1,1.72,43053054,41.2,37.2,66.1,11.7,31510100
"""Andorra""",164,"""AD""",40.0,468,,7.2,469,,,"""EUR""",1.27,34.0,1.51,3154100000.0,106.4,,2.7,"""Andorra la Vella""",,,6.63,36.4,3.33,77142,,,,,67873
"""Angola""",26,"""AO""",47.5,1246700,117000.0,40.73,34693,261.73,17.1,"""AOA""",5.52,46.3,0.97,94635000000.0,113.5,9.3,51.6,"""Luanda""",60.8,241.0,0.71,33.4,0.21,31825295,77.5,9.2,49.1,6.89,21061025


Load technology innovation data:

- Scientific and technical journal articles.
- High-technology exports (current US$).
- High-technology exports (% of manufactured exports).

In [20]:
journal_articles: pl.DataFrame = pl.read_csv(
    "../data/raw/technological_innovation/journal_articles.csv"
)
exports_values: pl.DataFrame = pl.read_csv(
    "../data/raw/technological_innovation/exports_values.csv"
)
exports_percentages: pl.DataFrame = pl.read_csv(
    "../data/raw/technological_innovation/exports_percentages.csv"
)

# Select the columns that we need
journal_articles = journal_articles.select(["Country Name", "2022 [YR2022]"])
exports_values = exports_values.select(["Country Name", "2022 [YR2022]"])
exports_percentages = exports_percentages.select(["Country Name", "2022 [YR2022]"])

# Rename the columns
journal_articles = journal_articles.rename(
    {"Country Name": "Country", "2022 [YR2022]": "Tech journal articles"}
)
exports_values = exports_values.rename(
    {"Country Name": "Country", "2022 [YR2022]": "High-technology exports ($)"}
)
exports_percentages = exports_percentages.rename(
    {"Country Name": "Country", "2022 [YR2022]": "High-technology exports (%)"}
)

# Join the dataframes
technology_innovation = journal_articles.join(exports_values, on="Country")
technology_innovation = technology_innovation.join(exports_percentages, on="Country")

# Change ".." values to None
technology_innovation = technology_innovation.select(pl.all().replace("..", None))

# Change from string to float/integer and round to 2 decimal places
technology_innovation = technology_innovation.with_columns(
    pl.col("Tech journal articles").cast(pl.Float64).round(2),
    pl.col("High-technology exports ($)").cast(pl.Int64()),
    pl.col("High-technology exports (%)").cast(pl.Float64).round(2),
)

# Get the first 5 rows
technology_innovation.head()

Country,Tech journal articles,High-technology exports ($),High-technology exports (%)
str,f64,i64,f64
"""Afghanistan""",169.19,,
"""Albania""",238.59,886411.0,0.06
"""Algeria""",7606.65,,
"""American Samoa""",,,
"""Andorra""",9.6,49533520.0,13.31


Join the original data with the technology innovation data.

In [27]:
# Show sizes
print("Data size:", data.shape)
print("Technology innovation size:", technology_innovation.shape)

# Left join the dataframes
data = data.join(technology_innovation, on="Country", how="left")

# Show size
print("Data size:", data.shape)

# Filter out rows with missing values in the column "Tech journal articles"
null = data.filter(pl.col("Tech journal articles").is_null())

# Write null values to a csv file
null.write_csv("../data/null_values.csv")

Data size: (195, 45)
Technology innovation size: (219, 4)
Data size: (195, 48)
