In [2]:
import pandas as pd
import numpy as np

In [7]:
file_path_1 = 'data/Poll_details.csv'
file_path_2 = 'data/Composition.csv'

poll_data = pd.read_csv(file_path_1)
composition_data = pd.read_csv(file_path_2)


In [4]:
composition_data['Race_composition'] = pd.to_numeric(composition_data['Race_composition'], errors='coerce')

race_weights = dict(zip(composition_data['Race'].dropna(), composition_data['Race_composition'].dropna()))
age_weights = dict(zip(composition_data['Age Group'].dropna(), composition_data['Composition'].dropna()))
province_weights = dict(zip(composition_data['Province'].dropna(), composition_data['Province_population'].dropna()))


In [5]:
age_weights = {
    'Age 18-29': age_weights.get('15-59', 0) * 0.5,
    'Age 30-59': age_weights.get('15-59', 0) * 0.5,
    'Age 60+': age_weights.get('60+', 0)
}


def normalize_weights(weights):
    total = sum(weights.values())
    return {k: v / total for k, v in weights.items()} if total else {}

race_weights = normalize_weights(race_weights)
age_weights = normalize_weights(age_weights)
province_weights = normalize_weights(province_weights)

categories = {
    'Age': ['Age 18-29', 'Age 30-59', 'Age 60+'],
    'Economic': ['Poor', 'Middle', 'Better Off'],
    'Ethnicity': ['Sinhala', 'SL Tamil', 'Estate\\Indian Tamil', 'Muslim'],
    'Province': ['Western', 'Central', 'Southern', 'Northern', 'Eastern', 'North-Western', 'North Central', 'Uva', 'Sabaragamuwa']
}

def calculate_weighted_average(candidate_data, weights):
    values = [candidate_data[cat] * weights.get(cat, 0) for cat in candidate_data.index if cat in weights]
    total_weight = sum(weights.get(cat, 0) for cat in candidate_data.index if cat in weights)
    result = sum(values) / total_weight if total_weight else 0

    return result

def predict_win_percentage(candidate):
    candidate_data = poll_data[poll_data['Candidate'] == candidate].iloc[0]

    category_averages = []

    for category, cat_weights in [('Age', age_weights), ('Ethnicity', race_weights), ('Province', province_weights)]:
        avg = calculate_weighted_average(candidate_data[categories[category]], cat_weights)
        category_averages.append(avg)


    economic_avg = np.mean(candidate_data[categories['Economic']])
    category_averages.append(economic_avg)

    final_avg = np.mean(category_averages)
    return final_avg

In [6]:
candidates = poll_data['Candidate'].tolist()
win_percentages = {candidate: predict_win_percentage(candidate) for candidate in candidates}

sorted_candidates = sorted(win_percentages.items(), key=lambda x: x[1], reverse=True)


print("\nCandidates with predicted win percentages:")
for candidate, percentage in sorted_candidates:
    print(f"{candidate}: {percentage}")

winner = sorted_candidates[0][0]
print(f"\nPredicted winner: {winner} with {sorted_candidates[0][1]}")


Candidates with predicted win percentages:
Anura Kumara: 38.16760965109341
Sajith Premadaasa: 30.51696484281412
Ranil Wickremasinghe: 27.71703189150532
Namal Rajapaksha: 3.5569497643197763

Predicted winner: Anura Kumara with 38.16760965109341
