In [5]:
from pandas import DataFrame
from pandas import Series
import pandas as pd
import numpy as np
from scipy.stats import linregress
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import warnings

# Suppress all warnings
warnings.filterwarnings("ignore")

# Suppress specific warnings if necessary
# warnings.filterwarnings("ignore", category=DeprecationWarning)
# warnings.filterwarnings("ignore", category=FutureWarning)

# Set pandas option
pd.options.mode.copy_on_write = True

# Load data
meat_pop_df  = pd.read_csv("New Files/meat_totas.csv")
us_temps_df = pd.read_csv("New Files/global_temps_country.csv")
global_temps_df = pd.read_csv("New Files/global_temps.csv")
filtered_global_temp_df = pd.read_csv("New Files/global_temps_country_filtered.csv")
crop_production_df = pd.read_csv("New Files/crop_production.csv")
pork_prices_df = pd.read_csv("New Files/pork_prices.csv")
beef_prices_df = pd.read_csv("New Files/beef_prices.csv")
methane_levels_df = pd.read_csv("New Files/methane_levels_annual.csv")
greenhouse_gas_df = pd.read_csv("New Files/Greenhouse_gas.csv")

FileNotFoundError: [Errno 2] No such file or directory: 'New Files/meat_totas.csv'

In [6]:
# Look at Data types and modify if necessary
meat_totals_df.dtypes

NameError: name 'meat_totals_df' is not defined

In [None]:
#Find total red meat overall and make new column
meat_pop_df["total_red_meat"] = meat_pop_df["total_red_commercial"]+meat_pop_df["total_red_federal"]

meat_pop_df.head()

In [None]:
#Look for only 1977 and after because null values before that
meat_pop_1977_df = meat_pop_df.loc[meat_pop_df["Year"]>=1977]

#Pull only columns needed
meat_pop_1977_df = meat_pop_1977_df[["Month", "Year", "total_red_commercial", "total_red_federal", "total_red_meat"]]

#Drop Null Values

meat_pop_1977_df = meat_pop_1977_df.dropna()

meat_pop_1977_df.head()

In [None]:
#Drop Null Values to convert Dtypes
us_temps_df = us_temps_df.dropna()
#Change Data Types
us_temps_df["month"] = us_temps_df["month"].astype("int64")
us_temps_df.dtypes

In [None]:
#Rename to match meat data frame
us_temps_df = us_temps_df.rename(columns={"year":"Year", "month":"Month"})

us_temps_df.dtypes
# #Filter to 1977
us_temps_1977_df = us_temps_df.loc[us_temps_df["Year"]>=1977]


#reduce
us_temps_1977_df = us_temps_1977_df[["Year", "Month", "AverageTemperature"]]
us_temps_1977_df.head()

In [None]:
#Merge!

us_temps_meat_df = pd.merge(meat_pop_1977_df, us_temps_1977_df, left_on = ["Year", "Month"], right_on = ["Year", "Month"], how = "inner")

us_temps_meat_df.head()

In [None]:
outliers_df = us_temps_meat_df.loc[us_temps_meat_df["total_red_meat"]>10000]

filtered_df = us_temps_meat_df[us_temps_meat_df["total_red_meat"]<=10000]

filtered_df.head()

In [None]:
#Find Averages of total_red_meat
average_df = filtered_df.groupby("Year")["total_red_meat"].mean()

average_df = pd.DataFrame(average_df)

average_df.head()

In [None]:
# production of red meat using seaborn

# Scatter plot with regression line 
sns.set(style="whitegrid")
plt.figure(figsize=(10, 6))
sns.scatterplot(data=average_df, x=average_df.index, y="total_red_meat", color="green", edgecolor="black")
sns.regplot(data=average_df, x=average_df.index, y="total_red_meat", scatter=False, color="red")

# Adding regression equation
x = average_df.index
y = average_df["total_red_meat"]
(slope, intercept, rvalue, pvalue, stderr) = linregress(x, y)
line_eq = f'y = {slope:.2f}x + {intercept:.2f}'
plt.annotate(line_eq, xy=(0.05, 0.9), xycoords='axes fraction', fontsize=12, color='red')

# Adding labels and title
plt.xlabel("Year")
plt.ylabel("Weight (millions of pounds)")
plt.title("Red Meat Population by Year 1977 - 2024")

# Save the plot
# plt.savefig("red_meat_population_regression.png")

plt.tight_layout()
plt.show()

In [None]:
# production of red meat using plotly

# Scatter plot with regression line using Plotly Express
fig = px.scatter(average_df, x=average_df.index, y="total_red_meat", trendline="ols", 
                 trendline_color_override="red", labels={"index": "Year", "total_red_meat": "Weight (millions of pounds)"},
                 title="Red Meat Population by Year 1977 - 2024")
fig.update_traces(marker=dict(color='green', line=dict(color='black', width=1.5)))

# Display equation of the regression line
fig.add_annotation(
    x=0.05,
    y=0.9,
    xref="paper",
    yref="paper",
    text=f"y = {slope:.2f}x + {intercept:.2f}",
    showarrow=False,
    font=dict(color="red", size=12)
)

# Show the plot
fig.show()

# Save the plot as an image
# fig.write_image("red_meat_population_regression_plotly.png")

In [None]:
# Calculating US Avg temp Vs Red Meat population by Decade using seaborn
#Scatter plot
x = filtered_df["Year"]
y = filtered_df["AverageTemperature"]
(slope, intercept, rvalue, pvalue, stderr) = linregress(x, y)
regress_values = x * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
fig, ax = plt.subplots()
plt.xlabel("Year")
plt.ylabel("Weight (millions of pounds)")
plt.title("US Temps vs Red Meat Population 1977 - 2024")
plt.scatter(x,y, marker="o", facecolors="green", edgecolors="black")
plt.plot(x, regress_values, "r-")
plt.annotate(line_eq, (1977, 55), fontsize=15, color="red")

# Save the plot
# plt.savefig("us_temps_vs_red_meat_population.png")

plt.show()

In [None]:
# Calculating US Avg temp Vs Red Meat population by Decade using seaborn

# Scatter plot with regression line using Seaborn
sns.set(style="whitegrid")
plt.figure(figsize=(10, 6))
sns.scatterplot(data=filtered_df, x="Year", y="AverageTemperature", color="green", edgecolor="black")

# Calculate regression line
x = filtered_df["Year"]
y = filtered_df["AverageTemperature"]
(slope, intercept, rvalue, pvalue, stderr) = linregress(x, y)
regress_values = x * slope + intercept

# Plot regression line
plt.plot(x, regress_values, "r-")

# Adding regression equation
line_eq = f'y = {slope:.2f}x + {intercept:.2f}'
plt.annotate(line_eq, xy=(1977, 55), fontsize=12, color='red')

# Adding labels and title
plt.xlabel("Year")
plt.ylabel("Average Temperature (°F)")
plt.title("US Temperatures vs Red Meat Population 1977 - 2024")

plt.tight_layout()

# Save the plot
# plt.savefig("us_temps_vs_red_meat_population.png")

plt.show()

In [None]:
# Calculating the Average temp Vs Red Meat population by Decade using seaborn

# Calculate the decade for each year
filtered_df["Decade"] = (filtered_df["Year"] // 10) * 10

# Set the style
sns.set(style="whitegrid")

# Create a scatter plot with regression line for each decade
plt.figure(figsize=(12, 8))
for decade in filtered_df["Decade"].unique():
    decade_data = filtered_df[filtered_df["Decade"] == decade]
    sns.regplot(data=decade_data, x="Year", y="AverageTemperature", scatter=True, color="green", label=f"{decade}s")

# Adding labels and title
plt.xlabel("Year")
plt.ylabel("Average Temperature (°F)")
plt.title("US Temperatures vs Red Meat Population by Decade")

# Add legend
plt.legend()

# Save the plot
# plt.savefig("average_temperature_regression_per_decade.png")

plt.tight_layout()
plt.show()

In [None]:
# Calculating the Average temp Vs Red Meat population by Decade using plotly

# Calculate the decade for each year
filtered_df["Decade"] = (filtered_df["Year"] // 10) * 10

# Set the style
sns.set(style="whitegrid")

# Create a scatter plot with regression line for each decade
fig = px.scatter(filtered_df, x="Year", y="AverageTemperature", color="Decade", trendline="ols", 
                 labels={"Year": "Year", "AverageTemperature": "Average Temperature (°F)", "Decade": "Decade"},
                 title="US Temperatures vs Red Meat Population by Decade")
fig.update_traces(marker=dict(size=8))
fig.update_traces(line=dict(color='green', width=2))

# Update layout
fig.update_layout(
    legend_title_text="Decade",
    xaxis_title="Year",
    yaxis_title="Average Temperature (°F)",
)

# Show the plot
fig.show()

# Save the plot as an image
# fig.write_image("average_temperature_regression_per_decade_plotly.png")

In [None]:
# Calculating avergae temp per decade using seaborn boxplot

# Calculate the decade for each year
filtered_df["Decade"] = (filtered_df["Year"] // 10) * 10

# Set the style
sns.set(style="whitegrid")

# Create a box plot
plt.figure(figsize=(12, 8))
sns.boxplot(data=filtered_df, x="Decade", y="AverageTemperature", color="skyblue")

# Adding labels and title
plt.xlabel("Decade")
plt.ylabel("Average Temperature (°F)")
plt.title("Distribution of Average Temperatures by Decade")

# Save the plot
# plt.savefig("average_temperature_boxplot_per_decade.png")

plt.tight_layout()
plt.show()

In [None]:
# Calculate the decade for each year
filtered_df["Decade"] = (filtered_df["Year"] // 10) * 10

# Create the box plot using Plotly Express
fig = px.box(filtered_df, x="Decade", y="AverageTemperature", color="Decade",
             labels={"Decade": "Decade", "AverageTemperature": "Average Temperature (°F)"},
             title="Distribution of Average Temperatures by Decade")

# Update layout
fig.update_layout(
    xaxis_title="Decade",
    yaxis_title="Average Temperature (°F)",
)

# Show the plot
fig.show()

# Save the plot as an image
# fig.write_image("average_temperature_boxplot_per_decade_plotly.png")


In [None]:
#Clean Global Temps
#Drop Null
global_temps_df = global_temps_df.dropna()

global_temps_df.head()

In [None]:
july_df = global_temps_df.loc[global_temps_df["Month"]==7]
#july_df = july_df.sort_values(by="Year")

august_df = global_temps_df.loc[global_temps_df["Month"]==8]

june_df = global_temps_df.loc[global_temps_df["Month"]==6]

july_df.head()

In [None]:
# Land average temp for June, july August 
fig = plt.figure()

for frame in [june_df, july_df, august_df]:
    plt.plot(frame["Year"], frame["LandAverageTemperature"])

plt.xlabel("Year")
plt.ylabel("Land Average Temperature")
plt.title("Monthly Land Average Temperature Over the Years")
plt.legend(["June", "July", "August"])

# Save the plot
# plt.savefig("monthly_land_average_temperature.png")

plt.show()

In [None]:
# Land average temp for June, july August using Seaborn
# Concatenate the data frames and add a "Month" column to use seaborn 
june_df["Month"] = "June"
july_df["Month"] = "July"
august_df["Month"] = "August"
concatenated_df = pd.concat([june_df, july_df, august_df], ignore_index=True)

# Create the plot using Seaborn
plt.figure(figsize=(10, 6))
sns.lineplot(data=concatenated_df, x="Year", y="LandAverageTemperature", hue="Month", palette="muted")

# Adding labels and title
plt.xlabel("Year")
plt.ylabel("Land Average Temperature")
plt.title("Monthly Land Average Temperature Over the Years")

# Save the plot
# plt.savefig("monthly_land_average_temperature_seaborn.png")

plt.show()

In [None]:
# Land average temp for June, july August using plotly 

# Concatenate the data frames and add a "Month" column
june_df["Month"] = "June"
july_df["Month"] = "July"
august_df["Month"] = "August"
concatenated_df = pd.concat([june_df, july_df, august_df], ignore_index=True)

# Create the interactive plot using Plotly
fig = px.line(concatenated_df, x="Year", y="LandAverageTemperature", color="Month",
              title="Monthly Land Average Temperature Over the Years")
fig.update_xaxes(title="Year")
fig.update_yaxes(title="Land Average Temperature")

# Save the plot as an image file
# fig.write_image("monthly_land_average_temperature_plotly.png")

fig.show()

In [None]:
# Average temps accross the globe 
# Define the mapping for renaming the columns filtered_global_temp_df

new_column_names = {
    'dt': 'Date',
    'AverageTemperature': 'Avg_Temperature',
    'AverageTemperatureUncertainty' : 'Avg_temp_uncertainty',
    'Country': 'Country'
}

# Rename the columns
filtered_global_temp_df.rename(columns=new_column_names, inplace=True)


# Convert the 'Date' column to datetime format
filtered_global_temp_df['Date'] = pd.to_datetime(filtered_global_temp_df['Date'])

# Extract year and month from the 'Date' column
filtered_global_temp_df['Year'] = filtered_global_temp_df['Date'].dt.year
filtered_global_temp_df['Month'] = filtered_global_temp_df['Date'].dt.month

filtered_global_temp_df.head()

In [None]:
# Create the interactive plot using Plotly to compare countires temp by year grouped by decade 
fig = px.line(filtered_global_temp_df, x='Year', y='Avg_Temperature', color='Country', title='Average Temperature by Year and Country')
fig.update_xaxes(title='Year')
fig.update_yaxes(title='Average Temperature')

# Save the plot as an image file
# fig.write_image("average_temperature_by_year_and_country_plotly.png")

In [None]:
# Create a plot using Seaborn to compare countires temp by year grouped by decade 

# Set the style
sns.set(style="whitegrid")

# Create the line plot using Seaborn
plt.figure(figsize=(12, 6))
sns.lineplot(data=filtered_global_temp_df, x='Year', y='Avg_Temperature', hue='Country')

# Move the legend to the bottom
plt.legend(bbox_to_anchor=(1.05, 0), loc='lower left', borderaxespad=0.)

plt.xlabel('Year')
plt.ylabel('Average Temperature')
plt.title('Average Temperature by Year and Country')

# Save the plot as an image file
# plt.savefig("average_temperature_by_year_and_country_seaborn.png")

# Show the plot
plt.show()

In [None]:
# Define the mapping for renaming the columns  for crop_production_df.head()
new_column_names = {
    'index': 'Index',
    'LOCATION': 'Location',
    'INDICATOR': 'Indicator',
    'SUBJECT': 'Subject',
    'MEASURE': 'Measure',
    'FREQUENCY': 'Frequency',
    'TIME': 'Year',
    'VALUE': 'Value'
}

# Rename the columns
crop_production_df.rename(columns=new_column_names, inplace=True)

# Drop rows with NaN values
crop_production_df.dropna(inplace=True)

crop_production_df.head()

In [None]:
unique_location = crop_production_df['Location'].unique()
print(unique_location)

In [None]:
unique_subject = crop_production_df['Subject'].unique()
print(unique_subject)

In [None]:
# Define a function to remove outliers using IQR method
def remove_outliers_iqr(data, threshold=1.5):
    q1 = np.percentile(data, 25)
    q3 = np.percentile(data, 75)
    iqr = q3 - q1
    lower_bound = q1 - threshold * iqr
    upper_bound = q3 + threshold * iqr
    return data[(data >= lower_bound) & (data <= upper_bound)]

# Apply the function to remove outliers from the 'Value' column
crop_production_df['Value'] = remove_outliers_iqr(crop_production_df['Value'])

In [None]:
# Define a color palette for each subject 
color_palette = {'RICE': 'blue', 'WHEAT': 'green', 'MAIZE': 'orange', 'SOYBEAN': 'red'}

# Filter data for each subject and create individual bar graphs
for subject in ['RICE', 'WHEAT', 'MAIZE', 'SOYBEAN']:
    subject_data = crop_production_df[crop_production_df['Subject'] == subject]
    
    # Create a bar graph for the current subject
    plt.figure(figsize=(10, 6))  # Adjust figure size as needed
    plt.bar(subject_data['Year'], subject_data['Value'], color=color_palette[subject])
    
    # Add labels and title
    plt.title(f"{subject} Production Over Years")
    plt.xlabel('Year')
    plt.ylabel('Production')
    plt.xticks(rotation=45)  # Rotate x-axis labels for better readability
    
    # Show or save the plot
    plt.show()  
    
    # Save the plot as an image file
    # plt.savefig(f"{subject}_production_bar.png")  

In [None]:
    
# Define a color palette for each subject
color_palette = {'RICE': 'blue', 'WHEAT': 'green', 'MAIZE': 'orange', 'SOYBEAN': 'red'}

# Filter data for each subject and create individual bar charts
fig = go.Figure()

for subject in ['RICE', 'WHEAT', 'MAIZE', 'SOYBEAN']:
    subject_data = crop_production_df[(crop_production_df['Subject'] == subject) & (crop_production_df['Year'] >= 1990)]
    
    # Add a trace for the current subject
    fig.add_trace(go.Bar(x=subject_data['Year'], y=subject_data['Value'],
                         name=subject,
                         marker_color=color_palette[subject]))

# Update layout
fig.update_layout(title='Crop Production Over Years (Starting from 1990)',
                  xaxis_title='Year',
                  yaxis_title='Production',
                  barmode='group')  # To group bars for each year

# Show the plot
fig.show()

# Save the plot as an image file
# fig.write_image("crop_production_over_years_starting_from_1990_bar_plotly.# Production of each crop over the years using Plotlypng")

In [None]:
# using ploty to compare crop production by decade

# Define a color palette for each subject
color_palette = {'RICE': 'blue', 'WHEAT': 'green', 'MAIZE': 'orange', 'SOYBEAN': 'red'}

# Define a function to get decade from year
def get_decade(year):
    return str(year // 10 * 10) + "s"

# Apply the function to create a new column 'Decade'
crop_production_df['Decade'] = crop_production_df['Year'].apply(get_decade)

# Filter data for each subject and create individual bar graphs
for subject in ['RICE', 'WHEAT', 'MAIZE', 'SOYBEAN']:
    subject_data = crop_production_df[crop_production_df['Subject'] == subject]
    
    # Create a bar graph for the current subject using Plotly
    fig = px.bar(subject_data, x='Decade', y='Value', color='Decade', color_discrete_map=color_palette,
                 title=f"{subject} Production Over Decades", labels={'Decade': 'Decade', 'Value': 'Production'})
    
    # Show the plot
    fig.show()
    
    # Save the plot as an image file
    # fig.write_image(f"{subject}_production_bar_plotly.png")


In [None]:
# crop production after 1980..

# Define a color palette for each subject
color_palette = {'RICE': 'blue', 'WHEAT': 'green', 'MAIZE': 'orange', 'SOYBEAN': 'red'}

# Filter the data to include only years after 1980
crop_production_df_filtered = crop_production_df[crop_production_df['Year'] >= 1980]

# Create a single plot for all subjects
plt.figure(figsize=(12, 8))  # Adjust figure size as needed

# Group the filtered data by subject
grouped_data = crop_production_df_filtered.groupby('Subject')

# Iterate through each subject
for subject, group_data in grouped_data:
    # Plot the data for the current subject
    plt.bar(group_data['Year'], group_data['Value'], color=color_palette[subject], label=subject)

# Add labels and title
plt.title('Production by Subject')
plt.xlabel('Year')
plt.ylabel('Production')
plt.xticks(rotation=45)  # Rotate x-axis labels for better readability
plt.legend()

# Show or save the plot
plt.show()  

# Save the combined plot as an image
# plt.savefig("combined_production_bar.png")

In [None]:
# Crop production by location and subject using 
# Define a color palette for each subject
color_palette = {'RICE': 'blue', 'WHEAT': 'green', 'MAIZE': 'orange', 'SOYBEAN': 'red'}

# Group the data by unique location and subject
grouped_data = crop_production_df.groupby(['Location', 'Subject'])

# Calculate the number of rows and columns for subplots
num_locations = len(crop_production_df['Location'].unique())
num_subjects = len(crop_production_df['Subject'].unique())
num_cols = min(num_subjects, 4)  
num_rows = - (- num_locations // num_cols)  

# Create subplots
fig, axes = plt.subplots(num_rows, num_cols, figsize=(15, 5*num_rows))

# Flatten axes if there's only one row
if num_rows == 1:
    axes = axes[np.newaxis, :]

# Iterate through each group
for (location, subject), group_data in grouped_data:
    # Determine the subplot indices
    location_index = np.where(crop_production_df['Location'].unique() == location)[0][0]
    subject_index = np.where(crop_production_df['Subject'].unique() == subject)[0][0]
    
    # Plot the data for the current combination of location and subject
    ax = axes[location_index % num_rows, subject_index % num_cols]
    for label, df in group_data.groupby('Subject'):
        ax.bar(df['Year'], df['Value'], color=color_palette[label], label=label)
    
    # Add labels and title
    ax.set_title(f"Production in {location} - {subject}")
    ax.set_xlabel('Year')
    ax.set_ylabel('Production')
    ax.legend()

# Adjust layout
plt.tight_layout()

# Show or save the plot
plt.show()  

# Save the plot as an image file 
# plt.savefig("combined_production_bar.png")


In [None]:
# Crop production by Subject

# Determine the unique subjects and their count
unique_subjects = crop_production_df['Subject'].unique()
num_subjects = len(unique_subjects)

# Create subplots
fig, axes = plt.subplots(num_subjects, 1, figsize=(10, 6*num_subjects), sharex=True)

# Iterate through each subject
for i, subject in enumerate(unique_subjects):
    # Filter data for the current subject
    subject_data = crop_production_df[crop_production_df['Subject'] == subject]
    
    # Group the filtered data by year and location, then sum the production values for each location
    subject_production = subject_data.groupby(['Year', 'Location'])['Value'].sum().unstack()
    
    # Plot the production data for the current subject
    subject_production.plot(ax=axes[i], marker='o', linestyle='-')
    
    # Add labels and title
    axes[i].set_title(f"Production of {subject} by Country Over Years")
    axes[i].set_ylabel('Production')
    axes[i].legend(title='Location')

# Add common x-axis label
axes[-1].set_xlabel('Year')

# Adjust layout
# plt.tight_layout()

# Show or save the plot
plt.show()

# Save the plot as an image file 
# plt.savefig("Crop_production_by_country_over_years.png") 

In [None]:
# Crop production by Subject and location using seaborn 

# Set Seaborn style
sns.set_style("whitegrid")

# Create the plot using Seaborn's lineplot
plt.figure(figsize=(12, 8))
sns.lineplot(data=crop_production_df, x='Year', y='Value', hue='Location', style='Subject', markers=True, dashes=False)

# Add title and labels
plt.title('Production by Crops and Location Over Years')
plt.xlabel('Year')
plt.ylabel('Production')
plt.legend(title='Location')

# Show or save the plot
plt.show()

# Save the plot as an image file 
# plt.savefig("Crop_production_by_country_over_years_seaborn.png") 

In [None]:
# Load pork prices df
# Drop the 'Day' column
pork_prices_df.drop(columns=['Day'], inplace=True)

# Define the mapping for renaming the columns
new_column_names = {
    'Month': 'Month',
    'Year': 'Year',
    'Byproduct': 'Byproduct',
    'Gross_farm_value': 'Gross_Farm_Value',
    'Net_farm_value': 'Net_Farm_Value',
    'Wholesale_value': 'Wholesale_Value',
    'Retail_value': 'Retail_Value',
    'Total': 'Total',
    'Farm_wholesale': 'Farm_Wholesale',
    'Wholesale_retail': 'Wholesale_Retail'
}

# Rename the columns
pork_prices_df.rename(columns=new_column_names, inplace=True)

# Drop rows with NaN values
pork_prices_df.dropna(inplace=True)

# Display the modified DataFrame
pork_prices_df.head()

In [None]:
# Calculate mean, median, and standard deviation of 'Byproduct'
pork_mean_byproduct = pork_prices_df['Byproduct'].mean()
pork_median_byproduct = pork_prices_df['Byproduct'].median()
pork_std_byproduct = pork_prices_df['Byproduct'].std()

# Print the statistics
print("Statistics for Byproduct (Pork):")
print("Mean:", pork_mean_byproduct)
print("Median:", pork_median_byproduct)
print("Standard Deviation:", pork_std_byproduct)

In [None]:
# Load beef prices df

# Define the mapping for renaming the columns
new_column_names = {
    'DATE': 'Month',
    'Day': 'Day',  # Corrected column name
    'Unnamed: 2': 'Year',
    'Byproduct': 'Byproduct',
    'Gross_farm_value': 'Gross_Farm_Value',
    'Net_farm_value': 'Net_Farm_Value',
    'Wholesale_value': 'Wholesale_Value',
    'Retail_value': 'Retail_Value',
    'Total': 'Total',
    'Farm_wholesale': 'Farm_Wholesale',
    'Wholesale_retail': 'Wholesale_Retail',
    'All_fresh_beef_retail_value': 'Beef_Retail_Value'
}

# Rename the columns
beef_prices_df.rename(columns=new_column_names, inplace=True)

# Drop rows with NaN values
beef_prices_df.dropna(inplace=True)

# Display the modified DataFrame
beef_prices_df.head()

In [None]:
beef_mean_byproduct = beef_prices_df['Byproduct'].mean()
beef_median_byproduct = beef_prices_df['Byproduct'].median()
beef_std_byproduct = beef_prices_df['Byproduct'].std()

# Print the statistics
print("Statistics for Byproduct (Beef):")
print("Mean:", beef_mean_byproduct)
print("Median:", beef_median_byproduct)
print("Standard Deviation:", beef_std_byproduct)

In [None]:
import plotly.graph_objects as go

# Data
categories = ['Pork', 'Beef']
means = [5.935185185185184, 23.631050228310496]
medians = [5.5, 20.9]
std_devs = [2.057510754920059, 6.7940069358671495]

# Create the figure
fig = go.Figure()

# Add mean bars
fig.add_trace(go.Bar(
    x=categories,
    y=means,
    name='Mean',
    marker_color='skyblue'
))

# Add standard deviation error bars
fig.add_trace(go.Scatter(
    x=categories,
    y=means,
    error_y=dict(
        type='data',
        array=std_devs,
        visible=True
    ),
    mode='markers',
    name='Standard Deviation',
    marker=dict(color='black')
))

# Add median points
fig.add_trace(go.Scatter(
    x=categories,
    y=medians,
    mode='markers',
    name='Median',
    marker=dict(color='red', size=10)
))

# Update layout
fig.update_layout(
    title='Statistics for Byproduct (Pork and Beef)',
    xaxis_title='Byproduct',
    yaxis_title='Value',
    barmode='group'
)

# Save the plot as an image file
# fig.write_image("statistics_plotly.png")

# Show the plot
fig.show()


In [None]:
# Define the mapping for renaming the columns  for methane_levels
methane_levels_df.shape
methane_levels_df.head()

In [None]:
# Sort the methane_levels DataFrame by mean methane levels to get the lowest and highest 10 years
sorted_methane_levels = methane_levels_df.sort_values(by='mean')

# Select the lowest 10 years
lowest_df = sorted_methane_levels.head(10)[['year', 'mean']]
lowest_df['category'] = 'Lowest'

# Select the highest 10 years
highest_df = sorted_methane_levels.tail(10)[['year', 'mean']]
highest_df['category'] = 'Highest'

# Concatenate both DataFrames
combined_df = pd.concat([lowest_df, highest_df])

# Reset index
combined_df.reset_index(drop=True, inplace=True)

# Print the combined DataFrame
combined_df

In [None]:
# Create figure and axes
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))

# Plot lowest 10 years
ax1.barh(lowest_10_years['year'], lowest_10_years['mean'], color='skyblue')
ax1.set_xlabel('Mean Methane Levels')
ax1.set_ylabel('Year')
ax1.set_title('Top 10 Years with Lowest Mean Methane Levels')

# Plot highest 10 years
ax2.barh(highest_10_years['year'], highest_10_years['mean'], color='salmon')
ax2.set_xlabel('Mean Methane Levels')
ax2.set_ylabel('Year')
ax2.set_title('Top 10 Years with Highest Mean Methane Levels')

# Adjust layout
plt.tight_layout()

# Save the plot as a PNG file
# plt.savefig('methane_levels_side_by_side.png')

# Show plot
plt.show()

In [None]:
# Sort the methane_levels DataFrame by mean methane levels in ascending order to get the lowest 10 years using plotly
lowest_10_years = methane_levels_df.sort_values(by='mean').head(10)

# Create the figure
fig_lowest = go.Figure()

# Add bar trace for lowest 10 years
fig_lowest.add_trace(go.Bar(
    y=lowest_10_years['year'],
    x=lowest_10_years['mean'],
    orientation='h',
    name='Lowest 10 Years',
    marker=dict(color='skyblue')
))

# Update layout
fig_lowest.update_layout(
    title='Top 10 Years with Lowest Mean Methane Levels',
    xaxis_title='Mean Methane Levels',
    yaxis_title='Year',
    height=600,
    width=1000
)

# Save the plot as an image file
# fig_lowest.write_image("top_10_lowest_methane_levels.png")



In [None]:
# Sort the methane_levels DataFrame by mean methane levels in descending order to get the highest 10 years using plotly
highest_10_years = methane_levels_df.sort_values(by='mean', ascending=False).head(10)

# Create the figure
fig_highest = go.Figure()

# Add bar trace for highest 10 years
fig_highest.add_trace(go.Bar(
    y=highest_10_years['year'],
    x=highest_10_years['mean'],
    orientation='h',
    name='Highest 10 Years',
    marker=dict(color='salmon')
))

# Update layout
fig_highest.update_layout(
    title='Top 10 Years with Highest Mean Methane Levels',
    xaxis_title='Mean Methane Levels',
    yaxis_title='Year',
    height=600,
    width=1000
)

# Save the plot as an image file
# fig_highest.write_image("top_10_highest_methane_levels.png")


In [None]:
# Define the mapping for renaming the columns  for 
greenhouse_gas_df.shape

# Rename the first column to 'state'
greenhouse_gas_df.rename(columns={'Greenhouse Gas Gross Total, MMT CO2 eq.': 'State'}, inplace=True)

# Drop any rows with null values
greenhouse_gas_df.dropna(inplace=True)

greenhouse_gas_df.head()


In [None]:
greenhouse_gas_df.columns 

In [None]:
greenhouse_gas_df['State'].unique()

In [None]:
# Sort the DataFrame by carbon emissions in descending order
sorted_df = greenhouse_gas_df.sort_values(by='1990', ascending=False)

# Select the top 5 states with the highest carbon emissions
top_5_states_highest_emissions = sorted_df.head(5)['State'].tolist()

# Select the bottom 5 states with the lowest carbon emissions
bottom_5_states_lowest_emissions = sorted_df.tail(5)['State'].tolist()

# Print the top 5 states with the highest carbon emissions
print("Top 5 states with the highest carbon emissions:")
print(top_5_states_highest_emissions)

# Print the bottom 5 states with the lowest carbon emissions
print("\nBottom 5 states with the lowest carbon emissions:")
print(bottom_5_states_lowest_emissions)

In [None]:
# Sort the DataFrame by carbon emissions in descending order
sorted_df = greenhouse_gas_df.sort_values(by='1990', ascending=False)

# Select the top 5 states with the highest carbon emissions and their corresponding emissions values
top_5_states_highest_emissions = sorted_df.head(5)[['State', '1990']].values

# Select the bottom 5 states with the lowest carbon emissions and their corresponding emissions values
bottom_5_states_lowest_emissions = sorted_df.tail(5)[['State', '1990']].values


In [None]:
# create df to use for plotting 
top_5_states_highest_emissions_data = [
    ['Texas', 769.4285588063276],
    ['California', 439.39811845295],
    ['Pennsylvania', 303.25183939006087],
    ['Ohio', 301.03097232999573],
    ['Illinois', 253.8883962714412]
]

bottom_5_states_lowest_emissions_data = [
    ['District Of Columbia', 4.9082174496745],
    ['Vermont', 7.649251162936002],
    ['Rhode Island', 10.2410289278132],
    ['New Hampshire', 16.8721642445757],
    ['Delaware', 19.7130992749581]
]

# Create DataFrames
top_5_states_df = pd.DataFrame(top_5_states_highest_emissions_data, columns=['State', 'Emissions'])
bottom_5_states_df = pd.DataFrame(bottom_5_states_lowest_emissions_data, columns=['State', 'Emissions'])

# Displaying DataFrames
print("\nBottom 5 states with the lowest carbon emissions:")
bottom_5_states_df

In [None]:
# Displaying DataFrames
print("Top 5 states with the highest carbon emissions:")
top_5_states_df

In [None]:
# Plot pie chart for top 5 highest CO2 emissions
plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.pie(top_5_states_df['Emissions'], labels=top_5_states_df['State'], autopct='%1.1f%%', startangle=140)
plt.title('Top 5 States with Highest CO2 Emissions')

# Plot pie chart for top 5 lowest CO2 emissions
plt.subplot(1, 2, 2)
plt.pie(bottom_5_states_df['Emissions'], labels=bottom_5_states_df['State'], autopct='%1.1f%%', startangle=140)
plt.title('Top 5 States with Lowest CO2 Emissions')

plt.tight_layout()

# Save the plot as an image
# plt.savefig('top_5_states_CO2_emissions_pie_chart.png')

plt.show()


In [None]:
# Plot pie chart for top 5 highest CO2 emissions using plotly 
top_5_states_df = pd.DataFrame(top_5_states_highest_emissions_data, columns=['State', 'Emissions'])

# Plot pie chart for top 5 highest CO2 emissions using plotly
fig1 = go.Figure(data=[go.Pie(labels=top_5_states_df['State'], values=top_5_states_df['Emissions'], hole=.3)])
fig1.update_layout(title_text='Top 5 States with Highest CO2 Emissions')

# Save the figure as an image
# fig1.write_image("top_5_highest_CO2_emissions.png")

In [None]:
# Plot pie chart for top 5 lowest CO2 emissions using plotly
bottom_5_states_df = pd.DataFrame(bottom_5_states_lowest_emissions_data, columns=['State', 'Emissions'])

# Plot pie chart for top 5 lowest CO2 emissions using plotly
fig2 = go.Figure(data=[go.Pie(labels=bottom_5_states_df['State'], values=bottom_5_states_df['Emissions'], hole=.3)])
fig2.update_layout(title_text='Top 5 States with Lowest CO2 Emissions')

# Save the figure as an image
# fig2.write_image("top_5_lowest_CO2_emissions.png")
