In [2]:
import pandas as pd

file_path = 'AirborneEmissions_Processed.xlsx'

excel_file = pd.ExcelFile(file_path)

for sheet_name in excel_file.sheet_names:
    sheet_df = pd.read_excel(file_path, sheet_name=sheet_name)
    num_columns = sheet_df.shape[1]
    print(f"Sheet Name: {sheet_name}, Number of Columns: {num_columns}")

Sheet Name: Arsenic, Number of Columns: 34
Sheet Name: Cadmium, Number of Columns: 34
Sheet Name: Chromium, Number of Columns: 34
Sheet Name: Copper, Number of Columns: 34
Sheet Name: Lead, Number of Columns: 34
Sheet Name: Mercury, Number of Columns: 34
Sheet Name: Nickel, Number of Columns: 34
Sheet Name: Selenium, Number of Columns: 34
Sheet Name: Vanadium, Number of Columns: 34
Sheet Name: Zinc, Number of Columns: 34


In [3]:
for sheet_name in excel_file.sheet_names:
    sheet_df = pd.read_excel(file_path, sheet_name=sheet_name)
    num_columns = sheet_df.shape[1]  
    empty_rows = sheet_df.isnull().all(axis=1)
    
    if empty_rows.any():
        print(f"Sheet Name: {sheet_name} has empty or null rows at indices: {sheet_df[empty_rows].index.tolist()}")
    else:
        print(f"Sheet Name: {sheet_name} has no empty or null rows.")

Sheet Name: Arsenic has no empty or null rows.
Sheet Name: Cadmium has no empty or null rows.
Sheet Name: Chromium has no empty or null rows.
Sheet Name: Copper has no empty or null rows.
Sheet Name: Lead has no empty or null rows.
Sheet Name: Mercury has no empty or null rows.
Sheet Name: Nickel has no empty or null rows.
Sheet Name: Selenium has no empty or null rows.
Sheet Name: Vanadium has no empty or null rows.
Sheet Name: Zinc has no empty or null rows.


In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

file_path = 'AirborneEmissions_Processed.xlsx' 
excel_file = pd.ExcelFile(file_path)

num_simulations = 1000
future_years = np.arange(2023, 2031)  
output_file_path = 'predictions_mc.xlsx'  
images_output_dir = 'simulation_images' 

os.makedirs(images_output_dir, exist_ok=True)

all_simulation_results = {}

for sheet_name in excel_file.sheet_names:
    sheet_df = pd.read_excel(file_path, sheet_name=sheet_name)
    print(f"Processing sheet: {sheet_name}, Number of Columns: {sheet_df.shape[1]}")
    
    years = sheet_df.columns[1:]
    elements = sheet_df.iloc[:, 0]  
    yearly_change = sheet_df[years].pct_change(axis=1)
    
    simulation_results = pd.DataFrame()

    for element in elements:
        historical_emissions = sheet_df.loc[sheet_df.iloc[:, 0] == element, years].values.flatten()
        
        historical_changes = yearly_change.loc[sheet_df.iloc[:, 0] == element].dropna(axis=1).values.flatten()
        
        mean_change = np.mean(historical_changes)
        std_change = np.std(historical_changes)
        
        all_simulations = np.zeros((num_simulations, len(years) + len(future_years)))
        
        for sim in range(num_simulations):
            simulated_emissions = [historical_emissions[-1]]
            
            for _ in future_years:
                random_change = np.random.normal(mean_change, std_change)
                new_value = simulated_emissions[-1] * (1 + random_change)
                simulated_emissions.append(new_value)
            
            total_emissions = np.concatenate([historical_emissions, simulated_emissions[1:]])
            all_simulations[sim, :] = total_emissions
        
        mean_emissions = np.mean(all_simulations, axis=0)
        simulation_results[element] = mean_emissions
    
    all_years = np.concatenate([years, future_years])
    simulation_results.index = all_years
    all_simulation_results[sheet_name] = simulation_results
    
    plt.figure(figsize=(10, 6))
    for element in elements:
        plt.plot(all_years, simulation_results[element], label=element)
    
    plt.xlabel("Year")
    plt.ylabel("Emissions")
    plt.legend(loc="upper left")
    plt.title(f"Monte Carlo Simulation of Future Emissions ({sheet_name})")
    
    image_file_path = os.path.join(images_output_dir, f"{sheet_name}_simulation.png")
    plt.savefig(image_file_path)
    plt.close() 
    print(f"Graph saved for sheet '{sheet_name}' at {image_file_path}")

with pd.ExcelWriter(output_file_path) as writer:
    for sheet_name, simulation_result in all_simulation_results.items():
        simulation_result.to_excel(writer, sheet_name=sheet_name)

print(f"Monte Carlo simulation results saved to {output_file_path}")

Processing sheet: Arsenic, Number of Columns: 34
Graph saved for sheet 'Arsenic' at simulation_images\Arsenic_simulation.png
Processing sheet: Cadmium, Number of Columns: 34


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)


Graph saved for sheet 'Cadmium' at simulation_images\Cadmium_simulation.png
Processing sheet: Chromium, Number of Columns: 34
Graph saved for sheet 'Chromium' at simulation_images\Chromium_simulation.png
Processing sheet: Copper, Number of Columns: 34
Graph saved for sheet 'Copper' at simulation_images\Copper_simulation.png
Processing sheet: Lead, Number of Columns: 34
Graph saved for sheet 'Lead' at simulation_images\Lead_simulation.png
Processing sheet: Mercury, Number of Columns: 34


  x = asanyarray(arr - arrmean)


Graph saved for sheet 'Mercury' at simulation_images\Mercury_simulation.png
Processing sheet: Nickel, Number of Columns: 34
Graph saved for sheet 'Nickel' at simulation_images\Nickel_simulation.png
Processing sheet: Selenium, Number of Columns: 34
Graph saved for sheet 'Selenium' at simulation_images\Selenium_simulation.png
Processing sheet: Vanadium, Number of Columns: 34
Graph saved for sheet 'Vanadium' at simulation_images\Vanadium_simulation.png
Processing sheet: Zinc, Number of Columns: 34
Graph saved for sheet 'Zinc' at simulation_images\Zinc_simulation.png
Monte Carlo simulation results saved to predictions_mc.xlsx
