# ⛑️ Exploratory Data Analysis

[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](<https://colab.research.google.com/>)

# 1. ⚙️ Imports

Import the necessary libraries and packages.

In [None]:
import pandas as pd
import plotly.express as px

# Local Libraries
from data import data

# Data

In [None]:
state_vs_party_dataframe_path = data.get_dataset_path("rbs", "processed", "us_presidential_elections_by_state_and_party", 1)

In [None]:
state_vs_party_dataframe = pd.read_csv(state_vs_party_dataframe_path)

In [None]:
state_vs_party_dataframe[1:].head()

# Plot

In [None]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from urllib.request import urlopen
import json

# Load GeoJSON for US States
with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
    states_geojson = json.load(response)

df = state_vs_party_dataframe[1:]

# Create a separate DataFrame for each year
years = df.columns[1:]  # all year columns

# Create figures for each year
fig = go.Figure()

for i, year in enumerate(years):
    df_year = df[["Year", year]].copy()
    df_year["party_code"] = df_year[year].map({"Republican": 0, "Democratic": 1})

    visible = True if i == 0 else False

    fig.add_trace(go.Choropleth(
        geojson=states_geojson,
        locations=df_year["Year"],
        z=df_year["party_code"],
        locationmode="USA-states",
        featureidkey="properties.name",
        colorscale=["red", "blue"],
        zmin=0,
        zmax=1,
        colorbar_title="Party",
        visible=visible,
        name=year
    ))

# Dropdown menu to toggle years
dropdown_buttons = [
    {
        "label": year,
        "method": "update",
        "args": [
            {"visible": [i == j for j in range(len(years))]},
            {"title": f"US States Party Affiliation - {year}"}
        ]
    } for i, year in enumerate(years)
]

fig.update_layout(
    title="US States Party Affiliation - 1972",
    geo_scope="usa",
    updatemenus=[
        {
            "buttons": dropdown_buttons,
            "direction": "down",
            "showactive": True,
            "x": 0.1,
            "xanchor": "left",
            "y": 1.1,
            "yanchor": "top"
        }
    ],
    margin={"r":0, "t":40, "l":0, "b":0}
)

fig.show()


In [None]:
from urllib.request import urlopen
import json

with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
    counties = json.load(response)

import pandas as pd
df = pd.read_csv("https://raw.githubusercontent.com/plotly/datasets/master/fips-unemp-16.csv",
                   dtype={"fips": str})

import plotly.express as px

fig = px.choropleth_map(df, geojson=counties, locations='fips', color='unemp',
                           color_continuous_scale="Viridis",
                           range_color=(0, 12),
                           map_style="carto-positron",
                           zoom=1.8,
                            center = {"lat": 51.0902, "lon": -120.7129},
                           opacity=0.5,
                           labels={'unemp':'unemployment rate'}
                          )
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

In [None]:
import geopandas as gpd
import matplotlib.pyplot as plt

year = "2020"

df = state_vs_party_dataframe[1:].copy()
df_2024 = df[['Year', year]]

party_color_map = {'Republican': 'red', 'Democratic': 'blue'}
df_2024['color'] = df_2024[year].map(party_color_map)

# Load US states GeoJSON from a public GitHub repo
us_states = gpd.read_file('https://raw.githubusercontent.com/PublicaMundi/MappingAPI/master/data/geojson/us-states.json')

# Merge on state name (adjust case if needed)
merged = us_states.merge(df_2024, left_on='name', right_on='Year', how='left')

# Plot
fig, ax = plt.subplots(1, 1, figsize=(15, 10))
merged.plot(color=merged['color'].fillna('gray'), ax=ax, edgecolor='black')

ax.set_title("US States colored by 2024 Party (Red=Republican, Blue=Democratic)")
ax.axis('off')

plt.show("png")


In [None]:
state_vs_party_dataframe[year].value_counts()

In [None]:
year

In [None]:
# import pandas as pd
# import plotly.express as px
# import json
# from urllib.request import urlopen
#
# def plot_us_party_map_plotly(df, year):
#     # Prepare the data
#     df = df.copy()
#     print(df.head())
#     df.columns = df.iloc[0]  # First row as header
#     df = df[1:].reset_index(drop=True)
#     df.rename(columns={df.columns[0]: "State"}, inplace=True)
#
#     # if year not in df.columns:
#     #     raise ValueError(f"Year '{year}' not found in dataframe.")
#
#     df_year = df[["State", year]].copy()
#     df_year['party_code'] = df_year[year].map({'Republican': 0, 'Democratic': 1})
#     df_year['color'] = df_year['party_code'].map({0: 'red', 1: 'blue'})
#
#     # Load US states GeoJSON
#     geojson_url = 'https://raw.githubusercontent.com/PublicaMundi/MappingAPI/master/data/geojson/us-states.json'
#     with urlopen(geojson_url) as response:
#         us_states = json.load(response)
#
#     # Plot with Plotly Express
#     fig = px.choropleth(
#         df_year,
#         geojson=us_states,
#         locations="State",
#         locationmode="USA-states",
#         featureidkey="properties.name",
#         color="party_code",
#         color_continuous_scale=["red", "blue"],
#         scope="usa",
#         title=f"US States Party Affiliation - {year}",
#         labels={'party_code': 'Party'},
#         range_color=(0, 1)
#     )
#
#     fig.update_layout(margin={"r":0, "t":50, "l":0, "b":0})
#     fig.show()
#
# # Example usage
# plot_us_party_map_plotly(state_vs_party_dataframe, "2024")


In [None]:
from urllib.request import urlopen
import json

with urlopen('https://raw.githubusercontent.com/PublicaMundi/MappingAPI/master/data/geojson/us-states.json') as response:
    counties_geojson = json.load(response)

df = state_vs_party_dataframe[1:].copy()
df = df[['Year', year]]

party_color_map = {'Republican': 'red', 'Democratic': 'blue'}
df['color'] = df_2024[year].map(party_color_map)

import plotly.express as px

fig = px.choropleth_map(df, geojson=counties, locations='Year', color=df['color'],
                           color_continuous_scale="Viridis",
                           map_style="carto-positron",

                           zoom=1.8,
                            center = {"lat": 51.0902, "lon": -120.7129},
                           opacity=0.5,
                           labels={'unemp':'unemployment rate'}
                          )
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()


In [None]:
from urllib.request import urlopen
import json
import pandas as pd
import plotly.express as px

# Load county GeoJSON
with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
    counties = json.load(response)

# Dummy example for how your dataframe should look
# Let's assume you have a dataframe with FIPS codes and party info per county:
# Replace this with your actual dataframe
# 'fips' should be a string column
data = {
    'fips': ['01001', '01003', '01005'],  # County FIPS codes
    'party': ['Republican', 'Democratic', 'Republican'],
    'year': ['2024', '2024', '2024']
}
df = pd.DataFrame(data)

# Map party to colors
party_color_map = {'Republican': 'red', 'Democratic': 'blue'}
df['color'] = df['party'].map(party_color_map)

# Use Plotly Express choropleth
fig = px.choropleth(df,
                    geojson=counties,
                    locations='fips',
                    color='party',  # can be 'color' if you want to fix color manually
                    color_discrete_map=party_color_map,
                    scope="usa",
                    labels={'party': 'Party'}
                   )

fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()


In [None]:
with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
    counties = json.load(response)

counties

In [None]:
from urllib.request import urlopen
import json
import pandas as pd
import plotly.express as px
import random

# Load Plotly GeoJSON for U.S. counties
with urlopen('https://raw.githubusercontent.com/PublicaMundi/MappingAPI/master/data/geojson/us-states.json') as response:
    counties_geojson = json.load(response)

# Extract all FIPS codes from the GeoJSON
all_fips = [feature["id"] for feature in counties_geojson["features"]]

# Generate random party affiliations for all counties (for demo purposes)
random.seed(42)  # for reproducibility
party_choices = ['Republican', 'Democratic']
party_data = [random.choice(party_choices) for _ in all_fips]

# Create a DataFrame with all FIPS and their assigned party
df = pd.DataFrame({
    'fips': all_fips,
    'party': party_data
})

# Define color mapping
party_color_map = {'Republican': 'red', 'Democratic': 'blue'}

# Plot the choropleth
fig = px.choropleth(df,
                    geojson=counties_geojson,
                    locations='fips',
                    color='party',
                    color_discrete_map=party_color_map,
                    scope="usa",
                    labels={'party': 'Party Affiliation'}
                   )

fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()


In [None]:
all_fips = [feature["id"] for feature in counties_geojson["features"]]

# Generate random party affiliations for all counties (for demo purposes)
random.seed(42)  # for reproducibility
party_choices = ['Republican', 'Democratic']
party_data = [random.choice(party_choices) for _ in all_fips]

party_data

In [None]:
len(all_fips)

In [None]:
import plotly.graph_objects as go
import pandas as pd
import json
from urllib.request import urlopen
import random

# Load US states GeoJSON (50 states + DC, no Puerto Rico)
with urlopen('https://raw.githubusercontent.com/PublicaMundi/MappingAPI/master/data/geojson/us-states.json') as response:
    states_geojson = json.load(response)

# Extract state names from GeoJSON
states = [feature['properties']['name'] for feature in states_geojson['features']]
years = [2016, 2020, 2024]

# Create fake party data for each state and year
party_map = ['Republican', 'Democratic']
party_color_map = {'Republican': 'red', 'Democratic': 'blue'}

data = []
random.seed(42)  # for consistent demo
for year in years:
    for state in states:
        party = random.choice(party_map)
        data.append({'year': year, 'state': state, 'party': party})

df = pd.DataFrame(data)

# Build Plotly figure with one choropleth per year
fig = go.Figure()

for i, year in enumerate(years):
    df_year = df[df['year'] == year]

    # Map party to numeric for z (Republican=1, Democratic=0)
    z_vals = [1 if p == 'Republican' else 0 for p in df_year['party']]
    hover_texts = df_year['state'] + ' - ' + df_year['party']

    fig.add_trace(go.Choropleth(
        geojson=states_geojson,
        locations=df_year['state'],
        z=z_vals,
        text=hover_texts,
        colorscale=[[0, 'blue'], [1, 'red']],
        zmin=0,
        zmax=1,
        marker_line_color='white',
        colorbar_title="Party",
        featureidkey="properties.name",
        locationmode='USA-states',
        visible=(i == 0)  # Only show the first year initially
    ))

# Create dropdown buttons for each year
buttons = []
for i, year in enumerate(years):
    visibility = [False] * len(years)
    visibility[i] = True
    buttons.append(dict(
        label=str(year),
        method="update",
        args=[
            {"visible": visibility},
            {"title": f"U.S. Party Control by State - {year}"}
        ]
    ))

# Add layout and dropdown
fig.update_layout(
    updatemenus=[{
        "buttons": buttons,
        "direction": "down",
        "showactive": True,
        "x": 0.1,
        "xanchor": "left",
        "y": 1.1,
        "yanchor": "top"
    }],
    title="U.S. Party Control by State - 2016",
    geo=dict(scope="usa"),
    margin={"r": 0, "t": 30, "l": 0, "b": 0}
)

fig.show()
