In [1]:
from google.colab import files
uploaded = files.upload()

Saving OSUN_crosschecked (2).csv to OSUN_crosschecked (2).csv


In [2]:
pip install pandas geopy scipy




# Steps to Detect Outliers in Election Data Using Geospatial Analysis
**1. Import Libraries and Load Data**

In [3]:
import pandas as pd
from scipy.spatial import distance

# Load the dataset
osun_data = pd.read_csv('OSUN_crosschecked (2).csv')


**2. Calculate Pairwise Distances**

In [4]:
from scipy.spatial.distance import pdist, squareform

# Extraction of coordinates
coordinates = osun_data[['Latitude', 'Longitude']].values

#  Calculate pairwise distances using Euclidean distance
distances = pdist(coordinates, 'euclidean')
distance_matrix = squareform(distances)


**3. Define Radius and Find Neighbors**

In [6]:
import numpy as np
# Convert the radius to degrees (approximation: 1 degree ~ 111 km)
radius_degrees = 1.0 / 111.0

# Find neighbours within the radius for each point
neighbours_list = []
for i in range(distance_matrix.shape[0]):
    neighbours = np.where(distance_matrix[i] <= radius_degrees)[0]
    neighbours = neighbours[neighbours != i]  # Remove the unit itself
    neighbours_list.append(neighbours.tolist())

# Add neighbours to the dataframe
osun_data['Neighbours'] = neighbours_list


**4. Calculate Outlier Scores**

In [7]:
# Function to calculate outlier scores for each party
def calculate_outlier_score(row, neighbours, data):
    scores = {}
    for party in ['APC', 'LP', 'PDP', 'NNPP']:
        neighbour_votes = data.iloc[neighbours][party].mean()
        scores[party] = abs(row[party] - neighbour_votes)
    return scores

# Apply function to each row to calculate outlier scores
osun_data['Outlier_Scores'] = osun_data.apply(
    lambda row: calculate_outlier_score(row, row['Neighbours'], osun_data), axis=1
)


**5. Flatten Outlier Scores and Sort Data**

In [8]:
# Flatten the outlier scores into separate columns for sorting
for party in ['APC', 'LP', 'PDP', 'NNPP']:
    osun_data[f'Outlier_Score_{party}'] = osun_data['Outlier_Scores'].apply(lambda x: x[party])

# Sort by the maximum outlier score among all parties
osun_data['Max_Outlier_Score'] = osun_data[['Outlier_Score_APC', 'Outlier_Score_LP', 'Outlier_Score_PDP', 'Outlier_Score_NNPP']].max(axis=1)
sorted_data = osun_data.sort_values(by='Max_Outlier_Score', ascending=False)



**6. Prepare Final Data for Report and Save to CSV**


In [9]:
# Select relevant columns for the final report
final_data = sorted_data[['PU-Code', 'PU-Name', 'LGA', 'Ward', 'Latitude', 'Longitude', 'Outlier_Score_APC', 'Outlier_Score_LP', 'Outlier_Score_PDP', 'Outlier_Score_NNPP', 'Max_Outlier_Score']]

# Save to a CSV file
final_data.to_csv('\\Users\\Kingsley\\Desktop\\sorted_outlier_scores.csv', index=False)
