# Country Data Preparation

In [278]:
# Install necessary libraries
!pip3 install -r requirements.txt


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.3.1[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [279]:
from countryinfo import CountryInfo
from hdx.location.country import Country
import numpy as np
import os
import pandas as pd
from phone_iso3166.country import *
import phonenumbers
import random
import sys
import uuid

In [280]:
# utility function to generate UUID
def generate_uuid(index, seed):
    rd = random.Random()
    rd.seed(seed + index)
    return uuid.UUID(int=rd.getrandbits(128), version=4)

In [281]:
# Load necessary data paths
country_icons_path = "../Data/media/icons/country_flags"

In [282]:
# Sex data
sex_data = {
    "code": [1, 2],
    "sex": ["male", "female"]
}
sex_df = pd.DataFrame(sex_data)
sex_df.insert(column="id", loc=0, value=[generate_uuid(i, 2024) for i in range(sex_df.shape[0])])
sex_df

Unnamed: 0,id,code,sex
0,940eee3c-ba6f-475c-ae84-496e7857dd86,1,male
1,d66887a3-a556-4782-952b-f8818ec8d8bc,2,female


In [283]:
# Country data

# Get country codes from icon name iso2 paths
country_flag_icons = os.listdir(country_icons_path)
print(f"Number of listed countries: {len(country_flag_icons)}")
country_dict = {}
country_dict["country"] = []
country_dict["iso2"] = []
country_dict["iso3"] = []
#country_dict["calling_code"] = []
country_dict["flag_icon_path"] = []

for index, icon_path in enumerate(country_flag_icons):
    country_iso2 = icon_path.split(".")[0].upper()
    country_by_ios2 = Country.get_country_info_from_iso2(country_iso2)
    if country_by_ios2:
        country_name = country_by_ios2["#country+name+preferred"]
        #print(f"{index + 1}: {country_name}")
        country_dict["iso2"].append(country_iso2)
        country_dict["iso3"].append(country_by_ios2["#country+code+v_iso3"])
        #country_calling_codes = CountryInfo(country_name).calling_codes()
        #if country_calling_codes:
        #    country_dict["calling_code"].append(",".join(country_calling_codes))
        #else:
        #    country_dict["calling_code"].append(None)
        country_dict["country"].append(country_name)
        country_dict["flag_icon_path"].append(f"assets/icons/country_flags/{icon_path}")

countries_df = pd.DataFrame(data=country_dict)
countries_df.insert(column="id", loc=0, value=[generate_uuid(i, 3770) for i in range(countries_df.shape[0])])
#countries_df["id"] = [generate_uuid(i, 2024) for i in range(countries_df.shape[0])]
countries_df.sort_values(by="country", ascending=True, inplace=True, ignore_index=True)
countries_df

Number of listed countries: 270


Unnamed: 0,id,country,iso2,iso3,flag_icon_path
0,2298f0af-4e56-4980-9db0-7aed25624f91,Afghanistan,AF,AFG,assets/icons/country_flags/af.svg
1,7c9b5b9f-d1f9-47a4-b80c-1a36b54e02fe,Albania,AL,ALB,assets/icons/country_flags/al.svg
2,3624c785-5d1f-4e7c-b650-ab849263d99a,Algeria,DZ,DZA,assets/icons/country_flags/dz.svg
3,34775998-2c61-481f-b45e-512d2c2cbc12,American Samoa,AS,ASM,assets/icons/country_flags/as.svg
4,40230b1d-1529-4d8d-83d6-f9ae58bf45b5,Andorra,AD,AND,assets/icons/country_flags/ad.svg
...,...,...,...,...,...
244,e2950f77-21c5-4e3d-8e5c-4b9d5922ed16,Western Sahara,EH,ESH,assets/icons/country_flags/eh.svg
245,3f9ab77c-1c2b-48e9-ab6b-e240119f4eb4,Yemen,YE,YEM,assets/icons/country_flags/ye.svg
246,a2db17e2-132f-4410-9efb-45127f3c78eb,Zambia,ZM,ZMB,assets/icons/country_flags/zm.svg
247,25b8efa3-7ab8-41f0-a311-d2228d20c482,Zimbabwe,ZW,ZWE,assets/icons/country_flags/zw.svg


In [284]:
# Create dataframe for diets
diet_data = {
    "diet": ["Cholesterol-friendly", "Diabetic", "Dukan", "Hypertension-friendly", "Ketogenic", "Paleo", "Pescetarian", "Vegan", "Vegetarian"],
    "description": [
        "Low intake of LDL or bad cholesterol by reducing saturated or trans fat, higher intake of unsatuared fats such as those found in fish, nuts and olive oil, higher intake of fibre",
        "Low intake of sugar and simple carbohydrates with a focus on whole grains, lean proteins, fruits and vegetables",
        "High-protein, low-carb diet for weight loss",
        "Low sodium and trans-fat intake with a focus on high potassium intake",
        "Low-carb, high-fat diet that puts the body into ketosis where it burns fat for energy instead of glucose",
        "Hunter-gather like diet of our ancestors - unprocessed foods such as nuts, vegtables, fruits and meat, excludes dairy, legume and grains",
        "Includes fish but excludes other kinds of meat",
        "Excludes not only meat but also animal-derived products such as milk, eggs and honey - strong focus on plant-based foods",
        "Excludes meat but may include other animal-derived products such as milk, eggs and honey - strong focus on plant-based foods"
    ],
}
diet_df = pd.DataFrame(data=diet_data)
diet_df.insert(column="id", loc=0, value=[generate_uuid(
    i, 5416) for i in range(diet_df.shape[0])])
diet_df

Unnamed: 0,id,diet,description
0,1f655136-7ad1-4b4e-b234-8c36f2519625,Cholesterol-friendly,Low intake of LDL or bad cholesterol by reduci...
1,5cb10763-0638-420e-a2a5-96b0bab23a07,Diabetic,Low intake of sugar and simple carbohydrates w...
2,a25635a4-aca7-443d-81a4-9c4a085609e9,Dukan,"High-protein, low-carb diet for weight loss"
3,3cb69109-86c1-452a-b7a7-1ce507d7e20c,Hypertension-friendly,Low sodium and trans-fat intake with a focus o...
4,e850791f-5c9e-40a5-8ba3-7e0aaa989940,Ketogenic,"Low-carb, high-fat diet that puts the body int..."
5,fe0ca4f7-317d-40c6-b11f-5e0de0435d09,Paleo,Hunter-gather like diet of our ancestors - unp...
6,0b7ad304-2fae-4f1c-b9f1-0ab397f12dbf,Pescetarian,Includes fish but excludes other kinds of meat
7,7c6d03ea-ab8c-4109-b684-cfb401c53859,Vegan,Excludes not only meat but also animal-derived...
8,dc6bfaaf-c9f6-461c-a0e5-2be76e0049d8,Vegetarian,Excludes meat but may include other animal-der...


In [285]:
# Create dataframe for skin types
skin_data = {
    "skin_type": ["Combination", "Dry", "Normal", "Oily", "Sensitive"],
    "description": [
        "Oily appearance across the T-zone (forehead, nose and chin) but typically dry across the rest of the skin",
        "Little moisture causing tight, flaky and dull appearance of the skin",
        "Balanced skin, not too oily nor too dry",
        "Excessive sebum production in the skin with shiny appearance, clogged pores and acne",
        "Prone to irritation and redness in response to products"
    ]
}
skin_df = pd.DataFrame(data=skin_data)
skin_df.insert(column="id", loc=0, value=[generate_uuid(i, 1392) for i in range(skin_df.shape[0])])
skin_df

Unnamed: 0,id,skin_type,description
0,71c8e9be-df3e-4f4f-a634-650242d730a9,Combination,"Oily appearance across the T-zone (forehead, n..."
1,fb8aa806-58b5-47eb-810c-233160679941,Dry,"Little moisture causing tight, flaky and dull ..."
2,4e7273a2-72f2-48c8-ad0f-1bb5658f5cdb,Normal,"Balanced skin, not too oily nor too dry"
3,ecec9a70-409f-4841-9a69-3faea939374b,Oily,Excessive sebum production in the skin with sh...
4,34a080c1-be36-4c87-be4d-3a14129bcf1e,Sensitive,Prone to irritation and redness in response to...


In [286]:
# Create dataframe for blood types
blood_type_data = {
    "blood_type": ["A+", "A-", "AB+", "AB-", "B+", "B-", "O+", "O-"],
    "description": [
        "Has A antigens and Rh factor",
        "Has A antigens, no Rh factor",
        "Has A and B antigens and Rh factor (universal recipient)",
        "Has A and B antigens, no Rh factor",
        "Has B antigens and Rh factor",
        "Has B antigens, no Rh factor",
        "No antigens, has Rh factor",
        "No antigens, no Rh factor (universal donor)"
    ]
}
blood_type_df = pd.DataFrame(data=blood_type_data)
blood_type_df.insert(column="id", loc=0, value=[generate_uuid(i, 8902) for i in range(blood_type_df.shape[0])])
blood_type_df

Unnamed: 0,id,blood_type,description
0,f7331ae5-b8ac-4986-8fb1-b298c4252e63,A+,Has A antigens and Rh factor
1,99e702ce-fde9-43f2-8c56-68cfe226e6a4,A-,"Has A antigens, no Rh factor"
2,89e91176-c302-4622-a936-9650005de900,AB+,Has A and B antigens and Rh factor (universal ...
3,4aec66a9-2c8d-447e-81fd-1874afd21ddf,AB-,"Has A and B antigens, no Rh factor"
4,d1b3dacd-3e89-4bc7-93d7-018f1ea71e30,B+,Has B antigens and Rh factor
5,f21668f7-c753-47d7-95ed-a5cd10c592fa,B-,"Has B antigens, no Rh factor"
6,369f1a24-e30e-4b80-8d0b-a539da00df86,O+,"No antigens, has Rh factor"
7,65a4b8f6-194a-4df3-ae03-8a954ed5ee89,O-,"No antigens, no Rh factor (universal donor)"


In [287]:
# Save dataframes as csv files
csv_path ="../Data/csv/"
if not os.path.exists(csv_path):
    os.makedirs(csv_path)

sex_df.to_csv(f"{csv_path}/sex.csv", index=None)
countries_df.to_csv(f"{csv_path}/countries.csv", index=None)
diet_df.to_csv(f"{csv_path}/diet.csv", index=None)
skin_df.to_csv(f"{csv_path}/skin.csv", index=None)
blood_type_df.to_csv(f"{csv_path}/blood_type.csv", index=None)