<a href="https://colab.research.google.com/github/Nastiiasaenko/Data-Commons-/blob/main/Second_Part_tutorial.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Part 2: Visualizing the emissions dataset.**

In this part of the tutorial, we show how to use the DataCommons API to construct a dataset of US states and their emissions data. We utilize the graph data structure of the DataCommons database to construct a dataset where each state will be linked to the other via the "nearby" places property of each state (node).

Imports:

In [None]:
#!pip install datacommons
import datacommons
import pandas as pd

Collecting datacommons
  Downloading datacommons-1.4.3-py3-none-any.whl.metadata (4.6 kB)
Downloading datacommons-1.4.3-py3-none-any.whl (46 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.5/46.5 kB[0m [31m722.5 kB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: datacommons
Successfully installed datacommons-1.4.3


* We will start by constructing a dataset for all US states that will be cross-referenced via nearby places property

In [None]:
# Getting US-states dcid (geo-id in case of states)
states = datacommons.get_places_in(['country/USA'],place_type='State')
states_list = states['country/USA']

In [None]:
## list of needed variables for each state
variables_list = [
    'Count_Person',
    'Annual_Emissions_CarbonDioxide_Biogenic',
    'Annual_Emissions_NitrousOxide_NonBiogenic',
    'Annual_Emissions_Methane_NonBiogenic',
    'Annual_Emissions_CarbonDioxide_NonBiogenic'
]

In [None]:
# For each state we are going to
# 1. Get statistical variables
# 2. Get property values for geo-coordinates and nearby places

## GET states name in a single dataset
names = datacommons.get_property_values(states_list, 'name')
nearby_places = datacommons.get_property_values(states_list, 'nearbyPlaces')
lat = datacommons.get_property_values(states_list, 'latitude')
lon = datacommons.get_property_values(states_list, 'longitude')

In [None]:
## now for each state we will get a timeseries of statistical variables that we chose earlier
data = []

for state in states_list:
    for var in variables_list:
        try:
            stat_series = datacommons.get_stat_series(state, var)
            for year, value in stat_series.items():
                data.append({'State': state, 'Year': year, 'Variable': var, 'Value': value})
        except Exception as e:
            print(f"Error fetching data for {state} and {var}: {e}")


Constructing a final dataset:

In [None]:
df = pd.DataFrame(data)

In [None]:
 df_pivot = df.pivot_table(index=['State', 'Year'], columns='Variable', values='Value').reset_index()

In [None]:
## adding in static information
df_pivot['Name'] = df_pivot['State'].map(names)
df_pivot['Nearby_Places'] = df_pivot['State'].map(nearby_places)
df_pivot['Lat'] = df_pivot['State'].map(lat)
df_pivot['Lon'] = df_pivot['State'].map(lon)

In [None]:
## heating and cooling degree day data set
def degree_day_df(dcid):
    """calls mean temperature for state and caluclates the cdd and hdd
    per year per month """
    mean_temps=datacommons.get_stat_series(dcid,"Mean_Temperature" )
    base_temperature = 18.3
    years = []
    month_names = []
    month_numbers = []
    hdd_values = []
    cdd_values = []

    for month_year, mean_temp in mean_temps.items():
        year = month_year[:4]
        month_name = pd.to_datetime(month_year, format='%Y-%m').strftime('%B')
        month_number = month_year[5:]

        # Calculate CDD and HDD
        cdd = max(mean_temp - base_temperature, 0)
        hdd = max(base_temperature - mean_temp, 0)

        years.append(int(year))
        month_names.append(month_name)
        month_numbers.append(int(month_number))
        hdd_values.append(hdd)
        cdd_values.append(cdd)

    df_degree_day = pd.DataFrame({
        'year': years,
        'month_name': month_names,
        'month_number': month_numbers,
        'HDD': hdd_values,
        'CDD': cdd_values
    })
    df_degree_day['State']=dcid
    df_degree_day=df_degree_day.sort_values(by=['year', 'month_number'])
    return df_degree_day

HCC_CDD_df=pd.DataFrame()

for s in states_list:
  temp_df =degree_day_df(s)
  HCC_CDD_df=pd.concat([HCC_CDD_df,temp_df])


HCC_CDD_df['Name'] = HCC_CDD_df['State'].map(names)


In [None]:
# vehicle dataset
vehicles_variable_list=['Count_Household_With0AvailableVehicles',
'Count_Household_With1AvailableVehicles',
'Count_Household_With2AvailableVehicles',
'Count_Household_With3AvailableVehicles',
'Count_Household_With4OrMoreAvailableVehicles',
'Count_Household_With4OrMorePerson_With0AvailableVehicles',
'Count_Household_With4OrMorePerson_With1AvailableVehicles',
'Count_Household_With4OrMorePerson_With2AvailableVehicles',
'Count_Household_With4OrMorePerson_With3AvailableVehicles',
'Count_Household_With4OrMorePerson_With4OrMoreAvailableVehicles',
'Count_Household_With1Person_With2AvailableVehicles',
'Count_Household_With1Person_With3AvailableVehicles',
'Count_Household_With1Person_With4OrMoreAvailableVehicles',
'Count_Household_With2Person_With0AvailableVehicles',
'Count_Household_With2Person_With1AvailableVehicles',
'Count_Household_With2Person_With2AvailableVehicles',
'Count_Household_With2Person_With3AvailableVehicles',
'Count_Household_With2Person_With4OrMoreAvailableVehicles',
'Count_Household_With3Person_With0AvailableVehicles',
'Count_Household_With3Person_With1AvailableVehicles',
'Count_Household_With3Person_With2AvailableVehicles',
'Count_Household_With3Person_With3AvailableVehicles',
'Count_Household_With3Person_With4OrMoreAvailableVehicles']


# get time series of vehicles
vehicles_data = []

for state in states_list:
    for var in vehicles_variable_list:
        try:
            stat_series = datacommons.get_stat_series(state, var)
            for year, value in stat_series.items():
                vehicles_data.append({'State': state, 'Year': year, 'Variable': var, 'Value': value})
        except Exception as e:
            print(f"Error fetching data for {state} and {var}: {e}")


df_vehicles = pd.DataFrame(vehicles_data)


In [None]:
 df_vehicles_pivot = df_vehicles.pivot_table(index=['State', 'Year'], columns='Variable', values='Value').reset_index()

In [None]:
df_pivot['Year'] = df_pivot['Year'].astype(int)

In [None]:
df_pivot.columns

Index(['State', 'Year', 'Annual_Emissions_CarbonDioxide_Biogenic',
       'Annual_Emissions_CarbonDioxide_NonBiogenic',
       'Annual_Emissions_Methane_NonBiogenic',
       'Annual_Emissions_NitrousOxide_NonBiogenic', 'Count_Person', 'Name',
       'Nearby_Places', 'Lat', 'Lon'],
      dtype='object', name='Variable')

In [None]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Sample DataFrame
data = {
    'State': ['geoId/01', 'geoId/02', 'geoId/03', 'geoId/04', 'geoId/05', 'geoId/06'],
    'Year': [2020, 2020, 2020, 2020, 2020, 2020],
    'Annual_Emissions_CarbonDioxide_Biogenic': [100, 200, 150, 300, 250, 400],
    'Annual_Emissions_CarbonDioxide_NonBiogenic': [90, 180, 140, 290, 230, 380],
    'Count_Person': [5000000, 700000, 1000000, 400000, 3000000, 2500000],
    'Nearby_Places': [['geoId/02', 'geoId/03', 'geoId/04', 'geoId/05', 'geoId/06'],
                      ['geoId/01', 'geoId/03', 'geoId/04', 'geoId/05', 'geoId/06'],
                      ['geoId/01', 'geoId/02', 'geoId/04', 'geoId/05', 'geoId/06'],
                      ['geoId/01', 'geoId/02', 'geoId/03', 'geoId/05', 'geoId/06'],
                      ['geoId/01', 'geoId/02', 'geoId/03', 'geoId/04', 'geoId/06'],
                      ['geoId/01', 'geoId/02', 'geoId/03', 'geoId/04', 'geoId/05']],
    'Name': ['[alabama]', '[alaska]', '[arizona]', '[arkansas]', '[california]', '[colorado]']
}
df = pd.DataFrame(data)
# df = df_pivot[df_pivot['Year'] >= 2016]

# df['Nearby_Places'] = df['Nearby_Places'].apply(lambda k: [x.split('@')[0] for x in k])

# Function to create the interactive plot
def create_plotly_visualization(df):
    states = df['State'].unique()
    years = df['Year'].unique()

    # Initialize figure
    fig = go.Figure()

    # Add dropdowns
    fig.update_layout(
        updatemenus=[
            {
                'buttons': [
                    {
                        'method': 'update',
                        'label': state,
                        'args': [
                            {'visible': [state in trace.name for trace in fig.data]},
                            {'title': f"State: {state}"}
                        ]
                    } for state in states
                ],
                'direction': 'down',
                'showactive': True,
                'x': 0.1,
                'xanchor': 'left',
                'y': 1.1,
                'yanchor': 'top'
            },
            {
                'buttons': [
                    {
                        'method': 'update',
                        'label': str(year),
                        'args': [
                            {'visible': [str(year) in trace.name for trace in fig.data]},
                            {'title': f"Year: {year}"}
                        ]
                    } for year in years
                ],
                'direction': 'down',
                'showactive': True,
                'x': 0.3,
                'xanchor': 'left',
                'y': 1.1,
                'yanchor': 'top'
            }
        ]
    )

    # Add traces for each state and year
    for state in states:
        for year in years:
            filtered_df = df[(df['State'] == state) & (df['Year'] == year)]
            if not filtered_df.empty:
                nearby_places = filtered_df['Nearby_Places'].values[0]
                nearby_df = df[(df['State'].isin(nearby_places)) & (df['Year'] == year)]
                combined_df = pd.concat([filtered_df, nearby_df])

                fig.add_trace(
                    go.Bar(
                        x=combined_df['Name'],
                        y=combined_df['Annual_Emissions_CarbonDioxide_Biogenic'],
                        name=f"{state} {year} Biogenic",
                        visible=False
                    )
                )

                fig.add_trace(
                    go.Bar(
                        x=combined_df['Name'],
                        y=combined_df['Annual_Emissions_CarbonDioxide_NonBiogenic'],
                        name=f"{state} {year} Non-Biogenic",
                        visible=False
                    )
                )

                fig.add_trace(
                    go.Bar(
                        x=combined_df['Name'],
                        y=combined_df['Count_Person'],
                        name=f"{state} {year} Population",
                        visible=False
                    )
                )

    # Set the initial visibility
    initial_state = states[0]
    initial_year = years[0]
    fig.update_traces(visible=False)
    for trace in fig.data:
        if initial_state in trace.name and str(initial_year) in trace.name:
            trace.visible = True

    # Set layout
    fig.update_layout(
        title=f"State: {initial_state}, Year: {initial_year}",
        xaxis_title="State",
        yaxis_title="Value",
        barmode='group'
    )

    return fig

# Create the plot
fig = create_plotly_visualization(df)

# Show the plot
fig.show()


In [None]:
import pandas as pd
import plotly.graph_objects as go

# Sample DataFrame
data = {
    'State': ['geoId/01', 'geoId/02', 'geoId/03', 'geoId/04', 'geoId/05', 'geoId/06'],
    'Year': [2020, 2020, 2020, 2020, 2021, 2020],
    'Annual_Emissions_CarbonDioxide_Biogenic': [100, 200, 150, 300, 250, 400],
    'Annual_Emissions_CarbonDioxide_NonBiogenic': [90, 180, 140, 290, 230, 380],
    'Count_Person': [5000000, 700000, 1000000, 400000, 3000000, 2500000],
    'Nearby_Places': [['geoId/02', 'geoId/03', 'geoId/04', 'geoId/05', 'geoId/06'],
                      ['geoId/01', 'geoId/03', 'geoId/04'],
                      ['geoId/01', 'geoId/02', 'geoId/04', 'geoId/05', 'geoId/06'],
                      ['geoId/01', 'geoId/02', 'geoId/03', 'geoId/05', 'geoId/06'],
                      ['geoId/01', 'geoId/02', 'geoId/03', 'geoId/04', 'geoId/06'],
                      ['geoId/01', 'geoId/02', 'geoId/03', 'geoId/04', 'geoId/05']],
    'Name': ['[alabama]', '[alaska]', '[arizona]', '[arkansas]', '[california]', '[colorado]']
}
data_plot  = pd.DataFrame(data)

# Function to create the interactive plot
def create_plotly_visualization(df):
    states = df['State'].unique()
    years = df['Year'].unique()

    # Initialize figure
    fig = go.Figure()

    # Add dropdowns
    fig.update_layout(
        updatemenus=[
            {
                'buttons': [
                    {
                        'method': 'update',
                        'label': state,
                        'args': [
                            [{'visible': [state in trace.name.split()[0] for trace in fig.data]}],
                            {'title': f"State: {state}"}
                        ]
                    } for state in states
                ],
                'direction': 'down',
                'showactive': True,
                'x': 0.1,
                'xanchor': 'left',
                'y': 1.1,
                'yanchor': 'top'
            },
            {
                'buttons': [
                    {
                        'method': 'update',
                        'label': str(year),
                        'args': [
                            [{'visible': [str(year) in trace.name.split()[1] for trace in fig.data]}],
                            {'title': f"Year: {year}"}
                        ]
                    } for year in years
                ],
                'direction': 'down',
                'showactive': True,
                'x': 0.3,
                'xanchor': 'left',
                'y': 1.1,
                'yanchor': 'top'
            }
        ]
    )

    # Add traces for each state and year
    for state in states:
        for year in years:
            filtered_df = df[(df['State'] == state) & (df['Year'] == year)]
            if not filtered_df.empty:
                nearby_places = filtered_df['Nearby_Places'].values[0]
                nearby_df = df[(df['State'].isin(nearby_places)) & (df['Year'] == year)]
                combined_df = pd.concat([filtered_df, nearby_df])

                fig.add_trace(
                    go.Bar(
                        x=combined_df['Name'],
                        y=combined_df['Annual_Emissions_CarbonDioxide_NonBiogenic'],
                        name=f"{state} {year} CO2 NB Biogenic per capita",
                        visible=(state == states[0] and year == years[0])
                    )
                )

                fig.add_trace(
                    go.Bar(
                        x=combined_df['Name'],
                        y=combined_df['Annual_Emissions_CarbonDioxide_Biogenic'],
                        name=f"{state} {year} Biogenic CO2 per capita",
                        visible=(state == states[0] and year == years[0])
                    )
                )

    # Set layout
    fig.update_layout(
        title=f"State: {states[0]}, Year: {years[0]}",
        xaxis_title="State",
        yaxis_title="Value",
        barmode='group'
    )

    return fig

# Create the plot
fig = create_plotly_visualization(data_plot)

# Show the plot
fig.show()


In [None]:
import pandas as pd
import plotly.graph_objects as go

# Sample DataFrame
data = {
    'State': ['geoId/01', 'geoId/02', 'geoId/03', 'geoId/04', 'geoId/05', 'geoId/06'],
    'Year': [2020, 2020, 2020, 2020, 2021, 2020],
    'Annual_Emissions_CarbonDioxide_Biogenic': [100, 200, 150, 300, 250, 400],
    'Annual_Emissions_CarbonDioxide_NonBiogenic': [90, 180, 140, 290, 230, 380],
    'Count_Person': [5000000, 700000, 1000000, 400000, 3000000, 2500000],
    'Nearby_Places': [['geoId/02', 'geoId/03', 'geoId/04', 'geoId/05', 'geoId/06'],
                      ['geoId/01', 'geoId/03', 'geoId/04'],
                      ['geoId/01', 'geoId/02', 'geoId/04', 'geoId/05', 'geoId/06'],
                      ['geoId/01', 'geoId/02', 'geoId/03', 'geoId/05', 'geoId/06'],
                      ['geoId/01', 'geoId/02', 'geoId/03', 'geoId/04', 'geoId/06'],
                      ['geoId/01', 'geoId/02', 'geoId/03', 'geoId/04', 'geoId/05']],
    'Name': ['[alabama]', '[alaska]', '[arizona]', '[arkansas]', '[california]', '[colorado]']
}
data_plot  = pd.DataFrame(data)

# Function to create the interactive plot
def create_plotly_visualization(df):
    states = df['State'].unique()
    years = df['Year'].unique()

    # Initialize figure
    fig = go.Figure()

    # Add traces for each state and year
    for state in states:
        for year in years:
            filtered_df = df[(df['State'] == state) & (df['Year'] == year)]
            if not filtered_df.empty:
                nearby_places = filtered_df['Nearby_Places'].values[0]
                nearby_df = df[(df['State'].isin(nearby_places)) & (df['Year'] == year)]
                combined_df = pd.concat([filtered_df, nearby_df])

                fig.add_trace(
                    go.Bar(
                        x=combined_df['Name'],
                        y=combined_df['Annual_Emissions_CarbonDioxide_NonBiogenic'],
                        name=f"{state} {year} CO2 NB Biogenic per capita",
                        visible=False
                    )
                )

                fig.add_trace(
                    go.Bar(
                        x=combined_df['Name'],
                        y=combined_df['Annual_Emissions_CarbonDioxide_Biogenic'],
                        name=f"{state} {year} Biogenic CO2 per capita",
                        visible=False
                    )
                )

    # Set the initial visibility
    initial_state = states[0]
    initial_year = years[0]
    for trace in fig.data:
        if initial_state in trace.name and str(initial_year) in trace.name:
            trace.visible = True

    # Add dropdowns
    state_buttons = [
        {
            'method': 'update',
            'label': state,
            'args': [
                [{'visible': [state in trace.name for trace in fig.data]}],
                {'title': f"State: {state}, Year: {initial_year}"}
            ]
        } for state in states
    ]

    year_buttons = [
        {
            'method': 'update',
            'label': str(year),
            'args': [
                [{'visible': [str(year) in trace.name for trace in fig.data]}],
                {'title': f"State: {initial_state}, Year: {year}"}
            ]
        } for year in years
    ]

    fig.update_layout(
        updatemenus=[
            {
                'buttons': state_buttons,
                'direction': 'down',
                'showactive': True,
                'x': 0.1,
                'xanchor': 'left',
                'y': 1.1,
                'yanchor': 'top'
            },
            {
                'buttons': year_buttons,
                'direction': 'down',
                'showactive': True,
                'x': 0.3,
                'xanchor': 'left',
                'y': 1.1,
                'yanchor': 'top'
            }
        ],
        title=f"State: {initial_state}, Year: {initial_year}",
        xaxis_title="State",
        yaxis_title="Value",
        barmode='group'
    )

    return fig

# Create the plot
fig = create_plotly_visualization(data_plot)

# Show the plot
fig.show()
