# Basic program analysis
---

This notebook serves to test some basic data analysis of electoral programs in markdown format.

## Setup

### Import libraries

In [None]:
import os
from ipywidgets import interact

In [None]:
os.chdir("..")

In [None]:
from utils.data_utils import DATA_DIR, load_yaml_file, load_markdown_file
from utils.nlp_utils import get_word_cloud, get_sentences, get_words

### Parameters

Get the party names from the data:

In [None]:
data_name = None
party_data = None
party_names = None
selected_party = None

In [None]:
data_names = os.listdir(DATA_DIR)
data_names = [name for name in data_names if name != ".DS_Store"]

In [None]:
@interact
def set_data(data=data_names):
    global data_name
    global party_data
    global party_names
    data_name = data
    party_data = load_yaml_file(os.path.join(DATA_DIR, data_name, "parties_data.yml"))
    party_names = list(party_data.keys())

Select a party:

In [None]:
@interact
def select_party(party=party_names):
    global selected_party
    selected_party = party

Get the topics to analyse:

In [None]:
topics = load_yaml_file(os.path.join(DATA_DIR, data_name, "topics.yml"))

In [None]:
topics.keys()

### Load data

In [None]:
program_txt = load_markdown_file(os.path.join(DATA_DIR, data_name, "programs", f"{selected_party}.md"))

In [None]:
# sample of the text
program_txt[:100]

## Analysis

### Key words

Get the sentences:

In [None]:
sentences = get_sentences(program_txt)

In [None]:
sentences[:10]

Get the words:

In [None]:
words = get_words(program_txt)

In [None]:
words[:10]

Do a word cloud:

In [None]:
get_word_cloud(words)

### Headers and subheaders

In [None]:
[s.replace("#", "   ") for s in sentences if s.startswith("#")]

In [None]:
main_headers = [s for s in sentences if s.startswith("# ")]
main_headers

### Category count

#### Topics

In [None]:
topical_sentences = dict()
for topic in topics:
    topical_sentences[topic] = list()
for sentence in sentences:
    for topic in topics:
        if any(topical_word in sentence for topical_word in topics[topic]):
            topical_sentences[topic].append(sentence)

In [None]:
for topic in topics:
    print(f"Count of sentences on topic '{topic}': {len(topical_sentences[topic])}")

In [None]:
len(sentences)

#### Rationality vs intentionality