# ⛑️ Exploratory Data Analysis

[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](<https://colab.research.google.com/>)

# 1. ⚙️ Imports

Import the necessary libraries and packages.

In [None]:
import pandas as pd
from urllib.request import urlopen
import json
import plotly.express as px

# Local Libraries
from data import data

# Data

In [None]:
state_vs_party_dataframe_path = data.get_dataset_path("rbs", "processed", "us_presidential_elections_by_state_and_party", 1)

In [None]:
state_vs_party_dataframe = pd.read_csv(state_vs_party_dataframe_path)

In [None]:
state_vs_party_dataframe.head()

# Remove 1st row and rename 1st column to State from Year

In [None]:
state_vs_party_dataframe = state_vs_party_dataframe.iloc[1:].copy()
state_vs_party_dataframe.reset_index(drop = True, inplace = True)
state_vs_party_dataframe.head()

In [None]:
state_vs_party_dataframe.rename(
    columns = {
        "Year": "State"
    },
    inplace = True
)

state_vs_party_dataframe.head()

In [None]:
state_vs_party_dataframe

# Create party map for 1 year

In [None]:
with urlopen("https://raw.githubusercontent.com/PublicaMundi/MappingAPI/master/data/geojson/us-states.json") as response:
    countries_geojson = json.load(response)

countries_fips = {feature["properties"]["name"]: feature["id"] for feature in countries_geojson["features"]}

In [None]:
countries_fips

In [None]:
state_vs_party_dataframe["Fips"] = state_vs_party_dataframe["State"].apply(lambda state: countries_fips[state])

In [None]:
state_vs_party_dataframe.head()

In [None]:
map_df = pd.DataFrame({
    "Fips": state_vs_party_dataframe["Fips"],
    "Party": state_vs_party_dataframe["2024"]
})

In [None]:
map_df.head()

In [None]:
party_color_map = {
    "Republican": "red",
    "Democratic": "blue"
}

In [None]:
fig = px.choropleth(
    map_df,
    geojson = countries_geojson,
    locations = "Fips",
    color = "Party",
    color_discrete_map = party_color_map,
    scope = "usa"
)

fig.show()

# All Year In One Map

In [None]:
state_vs_party_long_dataframe = state_vs_party_dataframe.iloc[:, 1:].melt(id_vars = "Fips", var_name = "Year", value_name = "Party")

In [None]:
party_color_map = {
    "Republican": "red",
    "Democratic": "blue"
}

In [None]:
fig = px.choropleth(
    state_vs_party_long_dataframe,
    geojson = countries_geojson,
    locations = "Fips",
    color = "Party",
    color_discrete_map = party_color_map,
    scope = "usa",
    animation_frame = "Year",
    title = "US Election Results Over the Years",
)

fig.show()

In [None]:
# Count the number of states per party per year
party_counts = (
    state_vs_party_long_dataframe
    .groupby(["Year", "Party"])
    .size()
    .reset_index(name="Count")
)

print(party_counts)


In [None]:
type(party_counts)

In [None]:
import plotly.express as px

# Create a faceted bar chart over time
fig_counts = px.bar(
    party_counts,
    x="Party",
    y="Count",
    color="Party",
    color_discrete_map=party_color_map,
    animation_frame="Year",
    title="Number of States Won by Each Party Over the Years"
)
fig_counts.show()


In [None]:
import plotly.express as px

# Group and count number of states won per party per year
party_counts = (
    state_vs_party_long_dataframe
    .groupby(["Year", "Party"])
    .size()
    .reset_index(name="Count")
)

# Convert 'Year' to int if it's not already (ensures correct plotting order)
party_counts["Year"] = party_counts["Year"].astype(int)

# Create the line chart
fig = px.line(
    party_counts,
    x="Year",
    y="Count",
    color="Party",
    markers=True,
    color_discrete_map=party_color_map,
    title="Number of States Won by Each Party Over the Years"
)

# Show the plot
fig.show()


In [None]:
party_counts = (
    state_vs_party_long_dataframe
    .groupby(["Party"])
    .size()
    .reset_index(name="Count")
)

party_counts

In [None]:
import plotly.express as px

# Step 1: Count number of states won by each party per year
party_yearly_counts = (
    state_vs_party_long_dataframe
    .groupby(["Year", "Party"])
    .size()
    .reset_index(name="Count")
)

# Step 2: Calculate average states won by each party across all years
party_avg = (
    party_yearly_counts
    .groupby("Party")["Count"]
    .mean()
    .reset_index(name="AverageCount")
)

# Step 3: Create the pie chart
fig = px.pie(
    party_avg,
    names="Party",
    values="AverageCount",
    color="Party",
    color_discrete_map=party_color_map,
    title="Average Number of States Won Per Year by Each Party"
)

fig.show()


In [None]:
party_avg