Imports

In [1]:
from qud_utils import createTreeFromFile, checkBigQuestion, checkLeaves
from tree_comparison import compareTwoTrees
from questions import getQuestions, getQuestionType, getQUDStack, createQuestionDF
from q_givenness import getUnknownNouns, getUnknownVerbs

import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import spacy

# Functions for analysing qud Trees

In [3]:
def getTextnameFromQudName(qudFilename):
    return "_".join(qudFilename.split(".")[0].split("_")[:2]) + ".txt"

In [4]:
def maxDepth(qudTree):
    return max([len(path) for path in qudTree.paths_to_leaves()])

def minDepth(qudTree):
    return min([len(path) for path in qudTree.paths_to_leaves()])

def avgDepth(qudTree):
    pathList = [len(path) for path in qudTree.paths_to_leaves()]
    return sum(pathList)/len(pathList)
    
def lenQUD(qudTree):
    return len(qudTree.leaves())

In [5]:
df = pd.DataFrame()

qudDirectory = "microText_QUD/"
microtextDirectory = "microText/"
filenames = os.listdir(qudDirectory)

df["filename"] = filenames
df["textname"] = [getTextnameFromQudName(qudFilename) for qudFilename in filenames]
df["qudTree"] = [createTreeFromFile(qudDirectory + filename) for filename in filenames]

for tree in df["qudTree"]:
    assert checkBigQuestion(tree), "The big question is not \' What is the way things are\'"
    
for tree, microTextFilename in zip(df["qudTree"], df["textname"]):
    assert checkLeaves(microtextDirectory + microTextFilename, tree), "The leaves do not represent the text segments"

df["maxDepth"] = [maxDepth(qudTree) for qudTree in df["qudTree"]]
df["minDepth"] = [minDepth(qudTree) for qudTree in df["qudTree"]]
df["avgDepth"] = [avgDepth(qudTree) for qudTree in df["qudTree"]]
df["lenQUD"] = [lenQUD(qudTree) for qudTree in df["qudTree"]]
display(df.head())

Unnamed: 0,filename,textname,qudTree,maxDepth,minDepth,avgDepth,lenQUD
0,micro_b021_andrew.txt,micro_b021.txt,"[Node(tag=What is the way things are?, identif...",6,3,4.5,4
1,micro_b053_tatjana.txt,micro_b053.txt,"[Node(tag=What is the way things are?, identif...",5,3,4.0,6
2,micro_b010_johann.txt,micro_b010.txt,"[Node(tag=What is the way things are?, identif...",5,3,4.0,5
3,micro_b061_luise.txt,micro_b061.txt,"[Node(tag=What is the way things are?, identif...",7,4,5.5,4
4,micro_b006_johann.txt,micro_b006.txt,"[Node(tag=What is the way things are?, identif...",5,3,4.0,5


In [6]:
df["lenQUD"].mean()

6.157894736842105

# Analyse Questions

In [7]:
QuestionDF = pd.DataFrame()

for tree in df["qudTree"]:
    newQuestionDF = createQuestionDF(tree)
    QuestionDF = pd.concat([QuestionDF,newQuestionDF], ignore_index=True)

Should there be tuition fees for studying in Germany?
Should state health insurance cover complementary medicine?
Should alternative treatments be subsidized?
Should penalties for dog dirt be higher?
Are higher penalties pointless in all cases?
Does Germany have the death penalty?
Does anything speak against the reason that a murderer has already decided on the life or death of another person?
Should intelligence services be regulated more tightly by parliament?
Are the disclosures of Edward Snowden relevant to Germany?
Should everyone contribute to the funding of public broadcasters?
Is it fair of landlords to raise the rent when a new tenant moves in?
Could an increase in the rent be justified?
Should the morning-after pill be sold in pharmacies?
Are pharmacists qualified enough to sell the morning-after pill?
Is the quality of the public channels good enough to be paid for?
Has the medicine been effective?
Will it bring damage to the pharma industry?
Will it bring damage to the conv

In [8]:
display(QuestionDF.head(1000))

Unnamed: 0,question,underneathTexts,priviousTexts,qudStack,span,type
0,What is the way things are?,Tuition fees should not be charged in Germany....,,[What is the way things are?],180,Big Question
1,Should there be tuition fees for studying in G...,Tuition fees should not be charged in Germany....,,[Should there be tuition fees for studying in ...,180,Yes/No
2,Why should there be no tuition fees in Germany?,Studying and taking higher degrees must remain...,Tuition fees should not be charged in Germany.,[Why should there be no tuition fees in German...,133,Why
3,What about fees makes studying and taking high...,Fees result in longer durations of studies. Th...,Tuition fees should not be charged in Germany....,[What about fees makes studying and taking hig...,58,What about
4,Why is studying longer a problem?,That's costly!,Tuition fees should not be charged in Germany....,"[Why is studying longer a problem?, What about...",14,Why
5,What is the way things are?,Many people see proven relief of their symptom...,,[What is the way things are?],553,Big Question
6,Should state health insurance cover complement...,Many people see proven relief of their symptom...,,[Should state health insurance cover complemen...,553,Yes/No
7,Why should state health insurance cover comple...,Many people see proven relief of their symptom...,,[Why should state health insurance cover compl...,89,Why
8,Why should state health insurance not cover co...,However there is no substantiated data that th...,Many people see proven relief of their symptom...,[Why should state health insurance not cover c...,256,Why
9,Why is it a problem that complementary medicin...,which means that quacks and phonies can practi...,Many people see proven relief of their symptom...,[Why is it a problem that complementary medici...,91,Why


## Question Types

In [9]:
QuestionDF["type"].value_counts()

What            90
Yes/No          77
Big Question    57
Why             53
How             20
[               18
What about      17
Who              4
So               3
When             3
In               2
Which            2
what             2
Attractive       1
If               1
Whose            1
Where            1
The              1
Example          1
Name: type, dtype: int64

In [10]:
QuestionDF["type"].count()

354

## Q-Givenness

In [11]:
nlp = spacy.load('en')

In [12]:
QuestionDF["unknownNouns"] = [getUnknownNouns(question, text, underneathText, nlp) for question, text, underneathText in zip(QuestionDF["question"], QuestionDF["priviousTexts"], QuestionDF["underneathTexts"])]

Tuition fees should not be charged in Germany. Studying and taking higher degrees must remain a basic right for everyone.

What about fees makes studying and taking higher degrees difficult for some people?

Fees result in longer durations of studies. That's costly!

{'people'}
Many people see proven relief of their symptoms and complaints by complementary medicine.

Why should state health insurance not cover complementary medicine?

However there is no substantiated data that this healing isn't simply due to the placebo effect. Besides many practices in this field are not regulated professions, which means that quacks and phonies can practice these occupations unknown to the patients.

{'insurance'}
Many people see proven relief of their symptoms and complaints by complementary medicine. However there is no substantiated data that this healing isn't simply due to the placebo effect. Besides many practices in this field are not regulated professions,

Why is it a problem that compleme

Health insurance companies should not cover treatment in complementary medicine

Are there exceptions?

unless the promised effect and its medical benefit have been concretely proven.

{'exception'}
Health insurance companies should not cover treatment in complementary medicine unless the promised effect and its medical benefit have been concretely proven.

Is there usually proof behind that?

Yet this very proof is lacking in most cases.

{'proof'}
Health insurance companies should not cover treatment in complementary medicine unless the promised effect and its medical benefit have been concretely proven. Yet this very proof is lacking in most cases.

Does complementart medicine help the patients?

Patients do often report relief of their complaints after such treatments.

{'patient'}
As a central airport Berlin Tegel is particularly attractive for business travellers

What’s a consequence of the airport being attractive for business travellers?

and should by all means remain operati

Rent prices should be limited by a cap when there's a change of tenant.

Why are rents so high?

Landlords may want to earn as much as possible, and many, consistent with market principles, are prepared to pay higher rents,

{'rent'}
Rent prices should be limited by a cap when there's a change of tenant. Landlords may want to earn as much as possible, and many, consistent with market principles, are prepared to pay higher rents, but that people with the same income suddenly must pay more and can't live in the same flat anymore seems implausible.

What does that do with our cities?

Gentrification destroys entire districts and their culture.

{'city'}
Video games, namely FIFA in my case, can fascinate young people for hours more intensively and emotionally than any sport in the world!

What could be a consequence from this?

It would be fair to make them into an Olympic event. Perhaps it is less thrilling for a spectator than for the player, but also for contemporary Olympic events, I r

From time immemorial humans have had complementary medicine,

Does complementary medicine work?

and it has not generally been ineffective.

{'work'}
From time immemorial humans have had complementary medicine, and it has not generally been ineffective.

Should humans use less of other types of medication?

A reduction in the amount of chemically produced medication per person is most certainly desirable. This will do little damage to the pharmaceutical industry, and conventional medicine will also hardly suffer. The patient could learn to be more aware of his or her body and to treat it more healthily.

{'medication', 'type'}
From time immemorial humans have had complementary medicine, and it has not generally been ineffective. A reduction in the amount of chemically produced medication per person is most certainly desirable.

What happens if production of chemical medications is reduced?

This will do little damage to the pharmaceutical industry, and conventional medicine will also h

No human being or human committee should again be given the permission to rule over 'life or death'. Courts are also subject to human error. That's why Germany should not introduce capital punishment! Every human, even those who have committed a despicable crime, can bring themselves to regret and change their opinion.

Should there be a path left open for criminals to change their minds?

A door must remain open for making amends.

{'path', 'mind', 'criminal'}
Secret services must be more tightly regulated

Why must they be more tightly regulated?

in order to prevent excessive surveillance. After all, secret services tend to develop a life of their own (a state within a state), which would be irresponsible for democratic states.

{'-PRON-'}
Secret services must be more tightly regulated in order to prevent excessive surveillance. After all, secret services tend to develop a life of their own (a state within a state), which would be irresponsible for democratic states.

Why must they 

Yes, it's annoying and cumbersome to separate your rubbish properly all the time.

How does waste separation look like?

Three different bin bags stink away in the kitchen and have to be sorted into different wheelie bins.

{'separation'}
Yes, it's annoying and cumbersome to separate your rubbish properly all the time. Three different bin bags stink away in the kitchen and have to be sorted into different wheelie bins.

How is waste disposal in Germany?

But still Germany produces way too much rubbish and too many resources are lost when what actually should be separated and recycled is burnt. We Berliners should take the chance and become pioneers in waste separation!

{'germany', 'disposal'}
Yes, it's annoying and cumbersome to separate your rubbish properly all the time. Three different bin bags stink away in the kitchen and have to be sorted into different wheelie bins. But still Germany produces way too much rubbish and too many resources are lost when what actually should be sepa

In [13]:
QuestionDF["unknownVerbs"] = [getUnknownVerbs(question, text, underneathText, nlp) for question, text, underneathText in zip(QuestionDF["question"], QuestionDF["priviousTexts"], QuestionDF["underneathTexts"])]

Tuition fees should not be charged in Germany. Studying and taking higher degrees must remain a basic right for everyone.

What about fees makes studying and taking higher degrees difficult for some people?

Fees result in longer durations of studies. That's costly!

{'make'}
Many people see proven relief of their symptoms and complaints by complementary medicine.

Why should state health insurance not cover complementary medicine?

However there is no substantiated data that this healing isn't simply due to the placebo effect. Besides many practices in this field are not regulated professions, which means that quacks and phonies can practice these occupations unknown to the patients.

{'cover', 'state'}
Many people see proven relief of their symptoms and complaints by complementary medicine. However there is no substantiated data that this healing isn't simply due to the placebo effect. Besides many practices in this field are not regulated professions, which means that quacks and pho

Health insurance companies should not cover treatment in complementary medicine unless the promised effect and its medical benefit have been concretely proven. Yet this very proof is lacking in most cases.

Does complementart medicine help the patients?

Patients do often report relief of their complaints after such treatments.

{'help', 'complementart'}
As a central airport Berlin Tegel is particularly attractive for business travellers

What’s a consequence of the airport being attractive for business travellers?

and should by all means remain operational. It is an asset for the economy. The negative impact of air traffic however is considerable. Air traffic is extremely environmentally unfriendly. Particularly in Tegel, the residents have been bearing the stress of aircraft noise for decades.

{'’'}
As a central airport Berlin Tegel is particularly attractive for business travellers and should by all means remain operational.

What other factors should be considered when deciding i

Video games, namely FIFA in my case, can fascinate young people for hours more intensively and emotionally than any sport in the world! It would be fair to make them into an Olympic event.

Would this be interesting for someone to watch?

Perhaps it is less thrilling for a spectator than for the player, but also for contemporary Olympic events, I recall curling and 50km cross-country skiing, you have to be more than a freak to follow those more properly than only in the roundup of highlights.

{'watch'}
Video games, namely FIFA in my case, can fascinate young people for hours more intensively and emotionally than any sport in the world! It would be fair to make them into an Olympic event. Perhaps it is less thrilling for a spectator than for the player,

Are Olympic events interesting to watch?

but also for contemporary Olympic events, I recall curling and 50km cross-country skiing, you have to be more than a freak to follow those more properly than only in the roundup of highlights.


Owner-run shops may potentially be overwhelmed by additional work times on Sundays and holidays, then again supermarkets and large shopping centres dominate the market today already. Thus smaller shops could benefit from the additional freedom, as customers are on average more laid-back on Sunday.

Who would benefit from opening shops on Sundays and holidays?

Opening on Sundays and holidays would therefore help both customers and shops.

{'open'}
No human being or human committee should again be given the permission to rule over 'life or death'.

Why shouldn't humans be able to determine the death penalty?

Courts are also subject to human error. That's why Germany should not introduce capital punishment! Every human, even those who have committed a despicable crime, can bring themselves to regret and change their opinion. A door must remain open for making amends.

{'determine'}
No human being or human committee should again be given the permission to rule over 'life or death'. Court

In [14]:
QuestionDF["unknownMaterial"] = [unknownVerbs + unknownNouns for unknownVerbs, unknownNouns in zip(QuestionDF["unknownVerbs"].tolist(), QuestionDF["unknownNouns"].tolist())]

In [15]:
QuestionDF["unknownNouns"].value_counts()

100    150
0       97
1       82
2       21
3        4
Name: unknownNouns, dtype: int64

In [16]:
QuestionDF["unknownVerbs"].value_counts()

100    150
0      132
1       59
2       11
3        2
Name: unknownVerbs, dtype: int64

In [17]:
QuestionDF["unknownMaterial"].value_counts()

200    150
1       81
0       65
2       37
3       16
4        5
Name: unknownMaterial, dtype: int64