In [118]:
import pandas as pd
from pprint import pprint
import matplotlib.pyplot as plt
from adjustText import adjust_text
from matplotlib import font_manager

In [135]:
df = pd.read_excel("/blagajpy.xlsx")

In [86]:
categories = {}

for _, row in df.iterrows():  # iterate over each indicator
    key1 = row['Code 1']

    if not pd.isna(key1):  # if category #1 exists
        if key1 not in categories:
            categories[key1] = 0
        categories[key1] += row['Total']  # give the subcategory #1 one vote
    
    key2 = row['Code 2'] 
    if not pd.isna(key2):  # if category #2 exists
        if key2 not in categories:
            categories[key2] = 0
        categories[key2] += row['Total']  # give the subcategory #2 one vote

In [89]:
sorted_subcategories = dict(sorted(categories.items(), key=lambda item: item[1], reverse=True))

In [128]:
grouped_categories = {}
for i in range(22):
    grouped_categories[str(i + 1)] = []

In [129]:
for subcat, votes in sorted_subcategories.items():
    parent = str(subcat).split(".")[0]
    
    if parent == '0':
        continue
    
    grouped_categories[parent].append({subcat: votes})

In [169]:
category_names = [
    'Integration and acceptance',
    'Tranquility/security ',
    'Public space',
    'Criminality and Legal Enforcement',
    'Police',
    'Social Relations',
    'Legacies of War',
    'Nationalism',
    'Governance',
    'Social Programs / Social Services',
    'Socializing',
    'Sport',
    'Media',
    'Religion',
    'Youth',
    'Drugs & Alcohol',
    'Infrastructure',
    'Environment',
    'Education',
    'Business',
    'Motor Vehicles ',
    'Gender'
]

In [None]:
fig, ax = plt.subplots(1,1, figsize=(25, 25))

# empty horizontal lines
ax.hlines(y=list(grouped_categories.keys()), color='gray', xmin=0, xmax=120)

# draw markers
for p, subs in grouped_categories.items():
    for s in subs:
        ax.scatter(y=p, x=list(s.values())[0], s=150, marker='|', color='black')

# write category names as y-axis ticks
ax.set_yticklabels(category_names)

# write label over marker
texts = []
for p, subs in grouped_categories.items():
    for s in subs:
        texts.append(plt.text(float(list(s.values())[0])-1, float(p)-0.8, list(s.keys())[0], rotation=90))
adjust_text(texts)
plt.savefig("DotPlot_all.pdf", bbox_inches='tight')

In [212]:
top5cats = [_-1 for _ in [1, 8, 11, 17, 20]]

In [237]:
plt.rcParams.update({'font.size': 14})
plt.rcParams.update({
    "text.usetex": False,
    "font.family": "serif",
})
plt.rcParams['pdf.fonttype'] = 42
font_path = '/helvetica.ttf'
font_manager.fontManager.addfont(font_path)
prop = font_manager.FontProperties(fname=font_path)
plt.rcParams['font.family'] = 'sans-serif'
plt.rcParams['font.sans-serif'] = prop.get_name()

In [None]:
fig, ax = plt.subplots(1,1, figsize=(14, 5))

# empty horizontal lines
ax.hlines(y=np.arange(5), color='gray', xmin=0, xmax=120)

# draw markers
for p, subs in grouped_categories.items():
    if int(p)-1 in top5cats:
        for s in subs:
            ax.scatter(y=p, x=list(s.values())[0], s=150, marker='|', color='black')

# write category names as y-axis ticks
ax.set_yticklabels(np.array(category_names)[top5cats][::-1])

# write label over marker
texts = []
y = 0
for p, subs in grouped_categories.items():
    if int(p)-1 in top5cats:
        y+=1
        for s in subs:
            texts.append(plt.text(float(list(s.values())[0])-1, y-0.8, list(s.keys())[0], rotation=90))
plt.savefig('dotplot_top5.pdf', bbox_inches='tight')
#adjust_text(texts)