Pandas

In [None]:
import pandas as pd

# Sample renewable energy sources data
renewable_sources = ["Solar", "Wind", "Hydropower", "Geothermal", "Biomass"]

# Sample green technology project data (for DataFrame)
data = {
    "Project": ["Solar Farm A", "Wind Turbine X", "Hydropower Y", "Solar Roof Z", "Geothermal Plant P"],
    "Technology": ["Solar", "Wind", "Hydropower", "Solar", "Geothermal"],
    "Capacity (MW)": [150, 300, 200, 50, 100],  # Megawatts
    "Cost (Million $)": [200, 400, 350, 100, 250],  # Project cost
    "Location": ["California", "Texas", "Washington", "Nevada", "Idaho"],
    "Completion Year": [2023, 2024, 2022, 2025, 2023]
}

renewable_series = pd.Series(renewable_sources)

print("Renewable Energy Sources:")
print(renewable_series)

In [None]:
project_df = pd.DataFrame(data)

print("\nGreen Technology Projects DataFrame:")
# In Pandas, .head() is a method used to display the first few rows of a DataFrame
project_df.head()

In [None]:
# Shows the tail of the table
project_df.tail(3)

In [None]:
# Slicing: shows the middle values or the sliced values
project_df[2:4]

In [None]:
# Data types from the given dataframe
project_df.dtypes

In [None]:
# Rows and columns numbers
project_df.shape

In [None]:
# Columns
project_df.columns

In [None]:
project_df.info()

In [None]:
project_df.describe()

In [None]:
type(project_df["Project"])

In [None]:
project_df[["Project", 'Capacity (MW)']]

In [None]:
project_df.iloc[:3, [1,4]]

In [None]:
# filter projects with capacity greather than 100 MW
high_capacity_project = project_df[project_df["Capacity (MW)"] > 100]

print("\nProject with Capacity Greater than 100 MW:")
print(high_capacity_project)

In [None]:
# Add a new column for cost per MW
# Also called feature engineering
project_df["Cost per MW"] = project_df["Cost (Million $)"] / project_df["Capacity (MW)"]

print("\nDataFrame with Cost per MW:")
project_df.head()

In [None]:
# To check null values if exists
project_df.isnull().sum()

Grouping data:

In [None]:
# Aggregate the total capacity and cost
total_capacity = project_df["Capacity (MW)"].sum()
total_cost = project_df["Cost (Million $)"].sum()

print(f"\nTotal Capacity of all projects: {total_capacity} MW")
print(f"Total Cost of all projects: ${total_cost} million")

In [None]:
# Grouping the data
# Group by 'Technology' and calculate total capacity for each type
grouped_data = project_df.groupby("Technology")["Capacity (MW)"].sum()

print("\nTotal Capacity by Technology:")
print(grouped_data)