In [19]:
# Import required libraries
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import seaborn as sns

# Set your name for image naming convention
your_name = "JackSchenck"  # Replace with your actual name

# ------------------------------
# 1. Load U.S. States shapefile
# ------------------------------
# Load US states geometry (built-in from geopandas or use a shapefile)
usa = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
usa = usa[(usa['continent'] == 'North America') & (usa['name'] == 'United States of America')]

# Optionally: load a more detailed US states shapefile
states = gpd.read_file("cb_2020_us_cousub_500k.zip")

# Filter to lower 48 states
exclude_states = ['Alaska', 'Hawaii', 'Puerto Rico', 'Guam', 'American Samoa', 
                  'Commonwealth of the Northern Mariana Islands', 'United States Virgin Islands']
states = states[~states['NAME'].isin(exclude_states)]

# ------------------------------
# 2. Load Your 2020 Dataset
# ------------------------------
# Placeholder: replace with your dataset file
# Example: state-level data with 'STATE_NAME' and 'Tornado_Count' columns
data = pd.read_csv("2020_geographic_data.csv")

# Merge the geographic data with state shapes
merged = states.merge(data, left_on="NAME", right_on="STATE_NAME")

# ------------------------------
# 3. Base Map of US
# ------------------------------
fig, ax = plt.subplots(figsize=(15, 10))
states.boundary.plot(ax=ax, linewidth=1, edgecolor='black')
plt.title("Base Map: 48 Contiguous U.S. States", fontsize=16)
plt.axis('off')
plt.savefig(f"{your_name}_US_BaseMap.png", dpi=300)
plt.show()

# ------------------------------
# 4. Choropleth: Data Distribution Map
# ------------------------------
fig, ax = plt.subplots(figsize=(15, 10))
merged.plot(column='Tornado_Count', cmap='OrRd', linewidth=0.8, ax=ax, edgecolor='0.8', legend=True)
ax.set_title("Tornado Distribution by State (2020)", fontsize=16)
ax.axis('off')
plt.savefig(f"{your_name}_TornadoDistributionMap.png", dpi=300)
plt.show()

# ------------------------------
# 5. Statistical Visualizations
# ------------------------------

# Bar Plot: Total Tornadoes per State
plt.figure(figsize=(12, 6))
sorted_data = data.sort_values(by='Tornado_Count', ascending=False)
sns.barplot(x='Tornado_Count', y='STATE_NAME', data=sorted_data, palette="flare")
plt.title("Total Tornadoes per State (2020)")
plt.xlabel("Tornado Count")
plt.ylabel("State")
plt.tight_layout()
plt.savefig(f"{your_name}_TornadoesBarChart.png", dpi=300)
plt.show()

# Histogram: Tornado Count Distribution
plt.figure(figsize=(8, 5))
sns.histplot(data['Tornado_Count'], bins=10, kde=True, color='skyblue')
plt.title("Distribution of Tornado Counts Across States")
plt.xlabel("Tornado Count")
plt.ylabel("Frequency")
plt.tight_layout()
plt.savefig(f"{your_name}_TornadoHistogram.png", dpi=300)
plt.show()

# Boxplot: Tornado Count
plt.figure(figsize=(6, 5))
sns.boxplot(x=data['Tornado_Count'], color="lightcoral")
plt.title("Boxplot of Tornado Counts Across States")
plt.xlabel("Tornado Count")
plt.tight_layout()
plt.savefig(f"{your_name}_TornadoBoxplot.png", dpi=300)
plt.show()

# Line Plot: Example trend (e.g., Tornado count by region)
# Assume you have 'Region' column with region-wise aggregation
if 'Region' in data.columns:
    region_data = data.groupby('Region')['Tornado_Count'].sum().reset_index()
    plt.figure(figsize=(8, 5))
    sns.lineplot(x='Region', y='Tornado_Count', data=region_data, marker='o')
    plt.title("Tornado Counts by Region")
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.savefig(f"{your_name}_TornadoByRegion.png", dpi=300)
    plt.show()

# ------------------------------
# Notes:
# - Replace 'Tornado_Count' and 'STATE_NAME' with your dataset’s actual column names
# - Add Markdown or docstrings to explain decisions
# ------------------------------


ModuleNotFoundError: No module named 'geopandas'