In [1]:
import pandas as pd

#sample renewable energy source data

renewable_resources = ["solar", "wind", "hydro", "geothermal", "biomass"]

#create a series of renewable resources
resources_series = pd.Series(renewable_resources)

print("Renewable Energy Resources Series:")
print(resources_series)

#Sample green technology procject data (for DataFrame)
data = {
    "Project": ["Solar Farm A", "Wind Turbine X", "Hydro Plant Y", "Geothermal D", "Biomass E"],
    "Capacity_MW": [150, 300, 200, 50, 100],
    "Cost(Million $)": [200, 400, 350, 100, 250],
    "Location": ["California", "Texas", "Washington", "Nevada", "Tdaho"],
    "Completion_Year": [2023, 2024, 2022, 2025, 2023]
}

projects_df = pd.DataFrame(data)
print("\nGreen Technology Projects DataFrame:")
print(projects_df)

Renewable Energy Resources Series:
0         solar
1          wind
2         hydro
3    geothermal
4       biomass
dtype: str

Green Technology Projects DataFrame:
          Project  Capacity_MW  Cost(Million $)    Location  Completion_Year
0    Solar Farm A          150              200  California             2023
1  Wind Turbine X          300              400       Texas             2024
2   Hydro Plant Y          200              350  Washington             2022
3    Geothermal D           50              100      Nevada             2025
4       Biomass E          100              250       Tdaho             2023


In [4]:
import pandas as pd

#sample renewable energy source data

renewable_resources = ["solar", "wind", "hydro", "geothermal", "biomass"]

#create a series of renewable resources
resources_series = pd.Series(renewable_resources)

print("Renewable Energy Resources Series:")
print(resources_series)

# Sample green technology project data (for DataFrame)
data = {
    "Project": ["Solar Farm A", "Wind Turbine X", "Hydropower Y", "Solar Roof D", "Geothermal Plant E"],
    "Technology": ["Solar", "Wind", "Hydropower", "Solar", "Geothermal"],
    "Capacity (MW)": [150, 300, 200, 50, 100], # Megawatts
    "Cost (Million $)": [200, 400, 350, 100, 250], # Project cost
    "Location": ["California", "Texas", "Washington", "Nevada", "Idaho"],
    "Completion Year": [2023, 2024, 2022, 2025, 2023]
}

# Create a DataFrame for green technology projects
projects_df = pd.DataFrame(data)
print("\nGreen Technology Projects DataFrame:")
print(projects_df)

# check data type # size

print("\nData Types of DataFrame Columns:")
print(projects_df.dtypes)


# from now onwards features means columns in data 

print("\nSize of the DataFrame (rows, columns):")
print(projects_df.shape)    
# Display data types of each column
print("\nData Types of DataFrame Columns:")
print(projects_df.dtypes)


# Additional operations as per common Pandas usage

# 1. Display the first few rows
print("\nFirst 3 rows of the DataFrame:")
print(projects_df.head(3))

# 2. Get summary statistics
print("\nSummary statistics for numeric columns:")
print(projects_df.describe())

# 3. Filter projects with capacity > 100 MW
high_capacity = projects_df[projects_df["Capacity (MW)"] > 100]
print("\nProjects with Capacity > 100 MW:")
print(high_capacity)

# 4. Group by Technology and calculate average cost
avg_cost_by_tech = projects_df.groupby("Technology")["Cost (Million $)"].mean()
print("\nAverage Cost by Technology:")
print(avg_cost_by_tech)

# 5. Sort by Completion Year
sorted_df = projects_df.sort_values("Completion Year")
print("\nDataFrame sorted by Completion Year:")
print(sorted_df)

# 6. Add a new column for Cost per MW
projects_df["Cost per MW ($)"] = projects_df["Cost (Million $)"] * 1000000 / projects_df["Capacity (MW)"]
print("\nDataFrame with Cost per MW column:")
print(projects_df)

# 7. Merge with another DataFrame (example: adding environmental impact data)
impact_data = {
    "Technology": ["Solar", "Wind", "Hydropower", "Geothermal"],
    "CO2 Saved (tons/year)": [50000, 75000, 60000, 40000],
    "Water Usage (gallons/MW)": [10, 5, 50, 20]
}
impact_df = pd.DataFrame(impact_data)

# Merge the DataFrames on 'Technology'
merged_df = pd.merge(projects_df, impact_df, on="Technology", how="left")
print("\nMerged DataFrame with Environmental Impact:")
print(merged_df)

# 8. Handle missing values (if any after merge)
merged_df.fillna({"CO2 Saved (tons/year)": 0, "Water Usage (gallons/MW)": 0}, inplace=True)
print("\nDataFrame after filling missing values:")
print(merged_df)

# 9. Calculate total CO2 saved by all projects
total_co2_saved = merged_df["CO2 Saved (tons/year)"].sum()
print(f"\nTotal CO2 Saved by all projects: {total_co2_saved} tons/year")

# 10. Group by Location and sum Capacity
capacity_by_location = merged_df.groupby("Location")["Capacity (MW)"].sum()
print("\nTotal Capacity by Location:")
print(capacity_by_location)

# 11. Export to Excel (if openpyxl is installed)
try:
    merged_df.to_excel("green_projects_analysis.xlsx", index=False)
    print("\nData exported to 'green_projects_analysis.xlsx'")
except ImportError:
    print("\nNote: Install openpyxl to export to Excel: pip install openpyxl")

# 12. Simple data visualization (requires matplotlib)
try:
    import matplotlib.pyplot as plt
    
    # Bar chart of Capacity by Technology
    tech_capacity = projects_df.groupby("Technology")["Capacity (MW)"].sum()
    tech_capacity.plot(kind='bar', title='Total Capacity by Technology')
    plt.ylabel('Capacity (MW)')
    plt.savefig('capacity_by_tech.png')
    print("\nBar chart saved as 'capacity_by_tech.png'")
    
    # Scatter plot of Cost vs Capacity
    projects_df.plot.scatter(x='Capacity (MW)', y='Cost (Million $)', title='Cost vs Capacity')
    plt.savefig('cost_vs_capacity.png')
    print("Scatter plot saved as 'cost_vs_capacity.png'")
    
except ImportError:
    print("\nNote: Install matplotlib for visualization: pip install matplotlib")

Renewable Energy Resources Series:
0         solar
1          wind
2         hydro
3    geothermal
4       biomass
dtype: str

Green Technology Projects DataFrame:
              Project  Technology  Capacity (MW)  Cost (Million $)  \
0        Solar Farm A       Solar            150               200   
1      Wind Turbine X        Wind            300               400   
2        Hydropower Y  Hydropower            200               350   
3        Solar Roof D       Solar             50               100   
4  Geothermal Plant E  Geothermal            100               250   

     Location  Completion Year  
0  California             2023  
1       Texas             2024  
2  Washington             2022  
3      Nevada             2025  
4       Idaho             2023  

Data Types of DataFrame Columns:
Project               str
Technology            str
Capacity (MW)       int64
Cost (Million $)    int64
Location              str
Completion Year     int64
dtype: object

Size of the 