In [1]:
import pandas as pd

data = pd.DataFrame([
    ["0–4", "Male", 8.03],
    ["0–4", "Female", 7.76],
    ["5–9", "Male", 8.44],
    ["5–9", "Female", 8.23],
    ["10–14", "Male", 9.09],
    ["10–14", "Female", 8.58],
    ["15–19", "Male", 8.26],
    ["15–19", "Female", 8.12],
    ["20–24", "Male", 8.23],
    ["20–24", "Female", 8.21],
    ["25–29", "Male", 7.89],
    ["25–29", "Female", 8.04],
    ["30–34", "Male", 7.83],
    ["30–34", "Female", 8.04],
    ["35–39", "Male", 7.16],
    ["35–39", "Female", 7.63],
    ["40–44", "Male", 6.68],
    ["40–44", "Female", 7.02],
    ["45–49", "Male", 6.03],
    ["45–49", "Female", 6.30],
    ["50–54", "Male", 5.40],
    ["50–54", "Female", 5.76],
    ["55–59", "Male", 4.62],
    ["55–59", "Female", 5.12],
    ["60–64", "Male", 4.43],
    ["60–64", "Female", 5.07],
    ["65–69", "Male", 3.64],
    ["65–69", "Female", 4.91],
    ["70–74", "Male", 2.43],
    ["70–74", "Female", 4.46],
    ["75–79", "Male", 1.13],
    ["75–79", "Female", 3.97],
    ["80+", "Male", 1.03],
    ["80+", "Female", 4.49]
], columns=["Age Range", "Gender", "Percentage"])

data.to_csv("../data/both_parties/age_gender_percentage.csv", index=False)


In [8]:
import pandas as pd

# Load all datasets
criteria_df = pd.read_excel("../data/both_parties/criterias-hcp.xlsx")
age_gender_df = pd.read_csv("../data/both_parties/age_gender_distribution.csv")
income_df = pd.read_excel("../data/both_parties/income_distribution.xlsx")
percentages_df = pd.read_excel("../data/both_parties/percentages.xlsx")
religion_df = pd.read_excel("../data/both_parties/religion.xlsx")

In [9]:
criteria_df.head()

Unnamed: 0,Indicator,Value/Distribution,Reference/Source,Page
0,Gender ratio,"50.2% Female, 49.8% Male",https://marocpme.gov.ma/wp-content/uploads/202...,Page 15
1,Population,"Urban: 64.8%, Rural: 35.2%",https://marocpme.gov.ma/wp-content/uploads/202...,Page 15
2,Marital Status,"Single: 34.8%, Married: 57.7%, Divorced: 2.2%,...",https://marocpme.gov.ma/wp-content/uploads/202...,Page 40
3,Education Levels,"Literacy Rate (15+): 87% (Men), 74% (Women); P...",https://marocpme.gov.ma/wp-content/uploads/202...,Page 111
4,Urbanization Rate,Urban Population: 64.8% in 2023 (up from 64.3%...,https://marocpme.gov.ma/wp-content/uploads/202...,"Page 15, 28"


In [30]:
# Group the data by Age Range and create the nested structure
gender_distribution = []
current_age_range = None
current_gender_details = []

for row in age_gender_df.to_dict(orient="records"):
    if row['Age Range'] != current_age_range:
        if current_age_range is not None:
            gender_distribution.append({
                'Age Range': current_age_range,
                'Gender Details': current_gender_details
            })
        current_age_range = row['Age Range']
        current_gender_details = []
    
    current_gender_details.append({
        'Gender': row['Gender'],
        'Percentage': row['Percentage']
    })

# Add the last group
if current_age_range is not None:
    gender_distribution.append({
        'Age Range': current_age_range,
        'Gender Details': current_gender_details
    })

gender_distribution

[{'Age Range': '15–19',
  'Gender Details': [{'Gender': 'Male', 'Percentage': 8.26},
   {'Gender': 'Female', 'Percentage': 8.12}]},
 {'Age Range': '20–24',
  'Gender Details': [{'Gender': 'Male', 'Percentage': 8.23},
   {'Gender': 'Female', 'Percentage': 8.21}]},
 {'Age Range': '25–29',
  'Gender Details': [{'Gender': 'Male', 'Percentage': 7.89},
   {'Gender': 'Female', 'Percentage': 8.04}]},
 {'Age Range': '30–34',
  'Gender Details': [{'Gender': 'Male', 'Percentage': 7.83},
   {'Gender': 'Female', 'Percentage': 8.04}]},
 {'Age Range': '35–39',
  'Gender Details': [{'Gender': 'Male', 'Percentage': 7.16},
   {'Gender': 'Female', 'Percentage': 7.63}]},
 {'Age Range': '40–44',
  'Gender Details': [{'Gender': 'Male', 'Percentage': 6.68},
   {'Gender': 'Female', 'Percentage': 7.02}]},
 {'Age Range': '45–49',
  'Gender Details': [{'Gender': 'Male', 'Percentage': 6.03},
   {'Gender': 'Female', 'Percentage': 6.3}]},
 {'Age Range': '50–54',
  'Gender Details': [{'Gender': 'Male', 'Percentage':

In [31]:
# Print or use the result
import json

def save_to_json(path, data):
    with open(path, 'w') as file:
        json.dump(data, file)

In [27]:
income_distribution = []
current_quintile = None
current_region_details = []

for row in income_df.to_dict(orient="records"):
    if row['Quintile'] != current_quintile:
        if current_quintile is not None:
            income_distribution.append({
                'Quintile': current_quintile,
                'Region Details': current_region_details
            })
        current_quintile = row['Quintile']
        current_region_details = []
    
    current_region_details.append({
        'Region': row['Region'],
        'Income': row['Income (in MAD)']
    })

# Add the last group
if current_quintile is not None:
    income_distribution.append({
        'Quintile': current_quintile,
        'Region Details': current_region_details
    })

# Print or use the result
income_distribution

[{'Quintile': 'First Quintile (20% least wealthy)',
  'Region Details': [{'Region': 'National (in MAD)', 'Income': 5998},
   {'Region': 'Urban  (in MAD)', 'Income': 7286},
   {'Region': 'Rural (in MAD)', 'Income': 4905}]},
 {'Quintile': 'Second Quintile',
  'Region Details': [{'Region': 'National (in MAD)', 'Income': 9852},
   {'Region': 'Urban (in MAD)', 'Income': 11974},
   {'Region': 'Rural (in MAD)', 'Income': 7664}]},
 {'Quintile': 'Third Quintile',
  'Region Details': [{'Region': 'National (in MAD)', 'Income': 13726},
   {'Region': 'Urban (in MAD)', 'Income': 16101},
   {'Region': 'Rural (in MAD)', 'Income': 10337}]},
 {'Quintile': 'Fourth Quintile',
  'Region Details': [{'Region': 'National (in MAD)', 'Income': 20733},
   {'Region': 'Urban (in MAD)', 'Income': 24567},
   {'Region': 'Rural (in MAD)', 'Income': 14252}]},
 {'Quintile': 'Fifth Quintile (20% wealthiest)',
  'Region Details': [{'Region': 'National (in MAD)', 'Income': 57514},
   {'Region': 'Urban (in MAD)', 'Income': 

In [28]:
percentages_result = []
current_indicator = None
current_category_details = []

for row in percentages_df.to_dict(orient="records"):
    if row['Indicator'] != current_indicator:
        if current_indicator is not None:
            percentages_result.append({
                'Indicator': current_indicator,
                'Category Details': current_category_details
            })
        current_indicator = row['Indicator']
        current_category_details = []
    
    current_category_details.append({
        'Category': row['Category'],
        'Value': row['Percentage']
    })

# Add the last group
if current_indicator is not None:
    percentages_result.append({
        'Indicator': current_indicator,
        'Category Details': current_category_details
    })

# Print or use the result
percentages_result

[{'Indicator': 'Gender Ratio',
  'Category Details': [{'Category': 'Female (%)', 'Value': 50.2},
   {'Category': 'Male (%)', 'Value': 49.8}]},
 {'Indicator': 'Population',
  'Category Details': [{'Category': 'Urban (%)', 'Value': 64.8},
   {'Category': 'Rural (%)', 'Value': 35.2}]},
 {'Indicator': 'Marital Status',
  'Category Details': [{'Category': 'Single (%)', 'Value': 34.8},
   {'Category': 'Widowed (%)', 'Value': 5.4},
   {'Category': 'Married (%)', 'Value': 57.7},
   {'Category': 'Divorced (%)', 'Value': 2.2}]},
 {'Indicator': 'Education Levels',
  'Category Details': [{'Category': 'Literacy Rate (Men, %)', 'Value': 87.0},
   {'Category': 'Literacy Rate (Women, %)', 'Value': 74.0},
   {'Category': 'Primary School Enrollment (%)', 'Value': 94.7}]},
 {'Indicator': 'Urbanization Rate',
  'Category Details': [{'Category': 'Urban Population (%)', 'Value': 64.8},
   {'Category': 'Casablanca-Settat (%)', 'Value': 77.8}]},
 {'Indicator': 'Height',
  'Category Details': [{'Category': 'Ma

In [29]:
religion_result = []
current_indicator = None
current_category_details = []

for row in religion_df.to_dict(orient="records"):
    if row['Indicator'] != current_indicator:
        if current_indicator is not None:
            religion_result.append({
                'Indicator': current_indicator,
                'Category Details': current_category_details
            })
        current_indicator = row['Indicator']
        current_category_details = []
    
    current_category_details.append({
        'Category': row['Category'],
        'Population': row['Number']
    })

# Add the last group
if current_indicator is not None:
    religion_result.append({
        'Indicator': current_indicator,
        'Category Details': current_category_details
    })

# Print or use the result
religion_result

[{'Indicator': 'Religious Affiliation',
  'Category Details': [{'Category': 'Sunni Muslims (%)',
    'Population': 'approximately 36.4 million people (99% of 36.7 million)'},
   {'Category': 'Shia Muslims (%)',
    'Population': 'Estimated at around 36,700 people (0.1% of 36.7 million)'},
   {'Category': 'Jews (People)', 'Population': 'Estimated at 3,500 people'},
   {'Category': 'Christians (Estimate)',
    'Population': 'Estimated at between 1,500 and 30,000 people'},
   {'Category': 'Baha’is and Other Groups',
    'Population': 'Less than 1% of the population'}]}]

In [34]:
data = {
    "gender_distribution": gender_distribution, 
    "income_distribution": income_distribution, 
    "percentages_result": percentages_result, 
    "religion_result": religion_result
}

for path, item in data.items():
    save_to_json(f"./{path}.json", item)