In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import geopandas
%matplotlib inline

In [None]:
# Read in the excel sheet of the CCS projects around the world
df = pd.read_excel("./data/CCS Map Data Jan2023.xlsx").set_index("Country Location")
df.info()



# Identify columns
df.columns



# Create a new dataframe with the necessary data
locationStatus = df[[
    "Overall Status",
    "Latitude",
    "Longitude"
]]
locationStatus



# Fill NAN with unidentified key word
locationStatus.loc[:, "Overall Status"] = locationStatus["Overall Status"].fillna("Unidentified")

# Replace Completed with active and replace hold and unidentified with Potential
locationStatus.loc[:, "Overall Status"] = locationStatus["Overall Status"].replace({"Completed": "Active", "Hold": "Potential", "Unidentified": "Potential"})
locationStatus



# Geopandas has a map of the world to use as the background of a plot
WorldMap = geopandas.read_file(geopandas.datasets.get_path("naturalearth_lowres"))

# Change the size and color of the map in the background
fig, ax = plt.subplots(figsize=(16, 10))
WorldMap.plot(color="grey", ax=ax)

# Create variables for the latitude, longitude, status of each CCS
x = locationStatus['Longitude']
y = locationStatus['Latitude']
status = locationStatus["Overall Status"]

# Define custom colors for each status category
custom_colors = {
    'Active': 'green',
    'Potential': 'yellow',
    'Terminated': 'red'
}

# Map the status to colors for each point
point_colors = status.map(custom_colors)

# Use a scatter plot to plot the variables and show where the locations are, with color-coded points
scatter = plt.scatter(x, y, c=point_colors, alpha=0.8, edgecolors="b", s=45)

# Add limits to the plot
plt.xlim([-180, 180])
plt.ylim([-90, 90])

# Add a legend for the status level
legend_labels = [plt.Line2D([0], [0], marker='o', color='w', markerfacecolor=color, markersize=10, label=status) for status, color in custom_colors.items()]
plt.legend(handles=legend_labels, title='Status Level', loc='center left')

# Add title and labels
plt.title("Carbon Capture and Storage Locations")
plt.xlabel("Longitude")
plt.ylabel("Latitude")
plt.show()



#Create a series for Overall status using value count and plot the series of the world
totalStatus2 = locationStatus["Overall Status"].value_counts()
ax = totalStatus2.plot(kind="bar", figsize=(5,4.5))

# Annotate each bar with its total
for i, value in enumerate(totalStatus2):
    ax.text(i, value + 0.1, str(value), ha='center', va='bottom', fontsize=9, color='black')

# Add title and labels
plt.title("CCS Project Status")
plt.xlabel("Status")
plt.ylabel("Project Count")

plt.show()


#Filter only the usa from the locationStatus dataframe
usaLocationStatus = locationStatus[locationStatus.index == "United States"]
usaLocationStatus.dropna()



# Use Geopandas to get a path to the world map
usaMap = geopandas.read_file(geopandas.datasets.get_path("naturalearth_lowres"))

# Use a filter to get the USA
usaMap = usaMap[usaMap.name == "United States of America"]

# Change the size and color of the map in the background
fig, ax = plt.subplots(figsize=(10, 10))
usaMap.plot(color="grey", edgecolor="black", ax=ax)

# Create variables for the latitude, longitude, status of each CCS
x = usaLocationStatus['Longitude']
y = usaLocationStatus['Latitude']
status = usaLocationStatus["Overall Status"]

# Define custom colors for each status category
custom_colors = {
    'Active': 'green',
    'Potential': 'yellow',
    'Terminated': 'red'
}

# Map the status to colors for each point
point_colors = status.map(custom_colors)

# Use a scatter plot to plot the variables and show where the locations are, with color-coded points
scatter = plt.scatter(x, y, c=point_colors, alpha=0.8, edgecolors="b", s=45)

# Add limits to the plot
plt.xlim([-140, -60])
plt.ylim([23, 54])

# Add a legend for the status level
legend_labels = [plt.Line2D([0], [0], marker='o', color='w', markerfacecolor=color, markersize=10, label=status) for status, color in custom_colors.items()]
plt.legend(handles=legend_labels, title='Status Level', loc='lower left')

# Add title and labels
plt.title("Carbon Capture and Storage Locations (USA)")
plt.xlabel("Longitude")
plt.ylabel("Latitude")
plt.show()


#Create a series for Overall status using value count and plot the series of the usa
totalStatusUsa = usaLocationStatus["Overall Status"].value_counts()
ax = totalStatusUsa.plot(kind="bar", figsize=(5,4.5))

# Annotate each bar with its total
for i, value in enumerate(totalStatusUsa):
    ax.text(i, value + 0.1, str(value), ha='center', va='bottom', fontsize=9, color='black')

# Add title and labels
plt.title("CCS Project Status (USA)")
plt.xlabel("Status")
plt.ylabel("Project Count")

plt.show()