In [1]:
import pandas as pd

# Loading the datasets provided
alldata = pd.read_excel('alldata.xlsx')
drdata = pd.read_excel('drdata.xlsx')
drq = pd.read_excel('drq.xlsx')
tv2data = pd.read_excel('tv2data.xlsx')
tv2q = pd.read_excel('tv2q.xlsx')
electeddata = pd.read_excel('electeddata.xlsx')

# Quick overview of the dataset
{
    "alldata": alldata.head(),
    "drdata": drdata.head(),
    "drq": drq.head(),
    "tv2data": tv2data.head(),
    "tv2q": tv2q.head(),
    "electeddata": electeddata.head()
}


{'alldata':    530  531  533  534  535  537  538  540  541  543  ...  9a  9b  10a  10b  \
 0   -1   -2    1   -2    2    1   -2    1    1    2  ...   2   0    1   -2   
 1    2    2   -1   -2   -1   -2    1   -2    2   -2  ...  -2   0   -1    2   
 2    2    1   -2   -2    1   -2    1   -1    1   -1  ...  -1  -2    0    2   
 3    2    1   -2   -1    1    1    1    1    1   -2  ...  -2   2    2    2   
 4    1    1   -2    2   -2    1   -2    1    2   -2  ...  -2   0   -1    0   
 
    11a  11b  12a  12b               storkreds  alder  
 0   -2    1    1    1    Københavns Storkreds     78  
 1    1   -2    0    0          Fyns Storkreds     64  
 2    0   -1    1   -2     Bornholms Storkreds     37  
 3    2   -1    2    0  Nordjyllands Storkreds     28  
 4   -2    0    2   -2    Københavns Storkreds     58  
 
 [5 rows x 53 columns],
 'drdata':    530  531  533  534  535  537  538  540  541  543  ...  552  553  555  556  \
 0   -1   -2    1   -2    2    1   -2    1    1    2  ...   

In [3]:
### Group candidates by party and age

# Grouping the candidates by party and getting the average and distribution of age
age_by_party = electeddata.groupby('storkreds')['alder'].agg(['mean', 'min', 'max', 'count']).reset_index()

# Display 
print("Age Distribution by Party:")
print(age_by_party)


Age Distribution by Party:
                     storkreds       mean  min  max  count
0          Bornholms Storkreds  46.500000   37   56      2
1               Fyns Storkreds  42.437500    0   67     16
2  Københavns Omegns Storkreds  43.933333   29   67     15
3         Københavns Storkreds  42.750000    0   74     20
4       Nordjyllands Storkreds  49.684211   31   68     19
5      Nordsjællands Storkreds  40.642857   28   57     14
6          Sjællands Storkreds  43.260870    0   75     23
7        Sydjyllands Storkreds  48.318182   21   69     22
8       Vestjyllands Storkreds  47.642857   32   59     14
9        Østjyllands Storkreds  45.041667   26   60     24


In [4]:
### Identify the most confident candidate

# +2 and -2 responses
strong_responses = electeddata.iloc[:, :-2]  # Removed storkreds and age columns

# Count of responses per cand
confident_responses = ((strong_responses == 2) | (strong_responses == -2)).sum(axis=1)

# the prop of confident resp
total_responses = strong_responses.shape[1]  # Total number of responses foe cand
confidence_ratio = confident_responses / total_responses

# Adding ratio to original elected data
electeddata['confidence_ratio'] = confidence_ratio

# Sorting cand by confidence 
most_confident_candidates = electeddata[['storkreds', 'alder', 'confidence_ratio']].sort_values(by='confidence_ratio', ascending=False).head(10)


print("Most Confident Candidates:")
print(most_confident_candidates)

Most Confident Candidates:
                   storkreds  alder  confidence_ratio
7      Østjyllands Storkreds     28          0.764706
45    Nordjyllands Storkreds     55          0.745098
1      Østjyllands Storkreds     58          0.725490
66            Fyns Storkreds     31          0.686275
74            Fyns Storkreds     42          0.686275
28      Københavns Storkreds     35          0.686275
118      Sjællands Storkreds      0          0.686275
145  Nordsjællands Storkreds     38          0.666667
127      Sjællands Storkreds     32          0.666667
126      Sjællands Storkreds     59          0.666667


In [5]:
### Differences in responses between candidates, both inter-party and intra-party

numeric_columns = electeddata.select_dtypes(include=['number']).columns
numeric_responses = electeddata[numeric_columns]

# Intra-party numeric vals 
party_disagreement = numeric_responses.groupby(electeddata['storkreds']).std().mean(axis=1).sort_values(ascending=False)

# Inter-party 
party_mean = numeric_responses.groupby(electeddata['storkreds']).mean().mean(axis=1)

# Merging intra- and inter-party 
party_analysis_corrected = pd.DataFrame({
    'Intra-party disagreement (std)': party_disagreement,
    'Inter-party position (mean)': party_mean
}).sort_values(by='Intra-party disagreement (std)', ascending=False)

print("Intra- and Inter-party Analysis:")
print(party_analysis_corrected)


Intra- and Inter-party Analysis:
                             Intra-party disagreement (std)  \
storkreds                                                     
Fyns Storkreds                                     1.582088   
Sjællands Storkreds                                1.537654   
Københavns Storkreds                               1.503397   
Østjyllands Storkreds                              1.482646   
Nordjyllands Storkreds                             1.461635   
Sydjyllands Storkreds                              1.449432   
Københavns Omegns Storkreds                        1.403364   
Nordsjællands Storkreds                            1.374115   
Vestjyllands Storkreds                             1.227652   
Bornholms Storkreds                                1.069496   

                             Inter-party position (mean)  
storkreds                                                 
Fyns Storkreds                                  0.969795  
Sjællands Storkreds              