# Thermal efficiency

## Hypothesis
The different motors have different efficiency

With this visualization we seek to make known which is the turbine that generates less CO2

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import altair as alt # easy visualizations

from functools import reduce

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/0db0ed38-868c-40d2-bcbd-bcedb88543cf/sample_submission.csv
/kaggle/input/0db0ed38-868c-40d2-bcbd-bcedb88543cf/engine_metadata.csv
/kaggle/input/0db0ed38-868c-40d2-bcbd-bcedb88543cf/site_metadata.csv
/kaggle/input/0db0ed38-868c-40d2-bcbd-bcedb88543cf/dataset/dataset/data_141.csv
/kaggle/input/0db0ed38-868c-40d2-bcbd-bcedb88543cf/dataset/dataset/data_202.csv
/kaggle/input/0db0ed38-868c-40d2-bcbd-bcedb88543cf/dataset/dataset/data_26.csv
/kaggle/input/0db0ed38-868c-40d2-bcbd-bcedb88543cf/dataset/dataset/data_165.csv
/kaggle/input/0db0ed38-868c-40d2-bcbd-bcedb88543cf/dataset/dataset/data_124.csv
/kaggle/input/0db0ed38-868c-40d2-bcbd-bcedb88543cf/dataset/dataset/data_178.csv
/kaggle/input/0db0ed38-868c-40d2-bcbd-bcedb88543cf/dataset/dataset/data_33.csv
/kaggle/input/0db0ed38-868c-40d2-bcbd-bcedb88543cf/dataset/dataset/data_49.csv
/kaggle/input/0db0ed38-868c-40d2-bcbd-bcedb88543cf/dataset/dataset/data_79.csv
/kaggle/input/0db0ed38-868c-40d2-bcbd-bcedb88543cf/dataset/dataset/data

## Loading Data

In [2]:
# Function to load a dataframe
def loading_df(df):
    return pd.read_csv(f"/kaggle/input/0db0ed38-868c-40d2-bcbd-bcedb88543cf/dataset/dataset/{df}")

In [3]:
# Loading the main tables from where the queries will be made
engine_metadata = pd.read_csv("/kaggle/input/0db0ed38-868c-40d2-bcbd-bcedb88543cf/engine_metadata.csv")
site_metadata = pd.read_csv("/kaggle/input/0db0ed38-868c-40d2-bcbd-bcedb88543cf/site_metadata.csv")

In [4]:
# Combining the tables to be able to know the FUEL_HV of tasting file
new = engine_metadata.merge(site_metadata, on=["CUSTOMER_NAME", "PLANT_NAME"])

# Visual inspection
new

Unnamed: 0,CUSTOMER_NAME,PLANT_NAME,ENGINE_ID,FILE_ID,LATITUDE,LONGITUDE,ELEVATION,FUEL_LHV
0,SPIFFY,SPIRITUAL-POLECAT,ENGINE_1,data_1.csv,61.170356,42.874767,112.000000,47331.28920
1,SPIFFY,SPIRITUAL-POLECAT,ENGINE_2,data_2.csv,61.170356,42.874767,112.000000,47331.28920
2,SPIFFY,SPIRITUAL-POLECAT,ENGINE_3,data_3.csv,61.170356,42.874767,112.000000,47331.28920
3,NONCHALANT,NIFTY-ROOK,ENGINE_1,data_4.csv,37.554516,49.908217,-29.000000,47358.23582
4,NONCHALANT,NIFTY-ROOK,ENGINE_2,data_5.csv,37.554516,49.908217,-29.000000,47358.23582
...,...,...,...,...,...,...,...,...
207,SCEPTICAL,GIGANTIC-EARWIG,ENGINE_2,data_209.csv,41.631139,55.981426,168.878861,47289.03400
208,MERCIFUL,PREMIUM-MOTH,ENGINE_1,data_210.csv,-26.175581,16.115096,1014.342346,47343.12242
209,MERCIFUL,PREMIUM-MOTH,ENGINE_2,data_211.csv,-26.175581,16.115096,1014.342346,47343.12242
210,MERCIFUL,PREMIUM-MOTH,ENGINE_3,data_212.csv,-26.175581,16.115096,1014.342346,47343.12242


# Creating the function to add the thermal efficiency column

Formula for thermal efficiency:
$$THRM\_EFF = \frac{POWER}{FUEL\_FLOW*FUEL\_LHV} * 100$$

In [5]:
# Function to create a column with thermal efficiency
def thermal_e_colum(file):
    """Create the thermal efficiency column for the chosen file
    """
    # loading datafrema
    data = loading_df(file)
    
    # Selecting FUEL_LHV value of the corresponding dataframe
    fuel_lvh = new.loc[new["FILE_ID"] == file]["FUEL_LHV"]
    
    # Dummy lists for a temporary column
    fuel_column = []
    porcentage = []
    
    # Loop to add values to list base on length of dataframe
    for i in range(8761):
        fuel_column.append(float(fuel_lvh))
        porcentage.append(100)
    
    # Creating fuel column dummie
    data["fuel_column"] = fuel_column
    
    # Creating porcentage column dummie
    data["porcentage"] = porcentage
    
    # Performing the calculation of the thermal efficiency based on the formula
    data["THERMAL_e"] = (data["POWER"] / (data["FUEL_FLOW"] * data["fuel_column"])) * data["porcentage"]
    
    # Deleting dummie columns
    data.drop(["fuel_column", "porcentage"], axis=1 , inplace=True)
    
    return data

## Testing the new formula

In [6]:
# Applying the formula
thermal = thermal_e_colum("data_1.csv")

In [7]:
# inspecting the new column
thermal

Unnamed: 0.1,Unnamed: 0,CMP_SPEED,POWER,FUEL_FLOW,CO2,THERMAL_e
0,01/01/2021 00:00,7093.322089,9187.921323,0.663525,1.739632,29.255776
1,01/01/2021 01:00,7094.569052,9186.903547,0.663176,1.738718,29.267912
2,01/01/2021 02:00,7089.672026,9191.390126,0.663971,1.740801,29.247163
3,01/01/2021 03:00,7092.412620,9211.980770,0.665044,1.743615,29.265387
4,01/01/2021 04:00,7088.579389,9163.370698,0.662288,1.736389,29.232097
...,...,...,...,...,...,...
8756,31/12/2021 20:00,0.000000,,,,
8757,31/12/2021 21:00,0.000000,,,,
8758,31/12/2021 22:00,0.000000,,,,
8759,31/12/2021 23:00,0.000000,,,,


## Adapting the formula to automate the extraction of the average thermal efficiency

In [8]:
# Formula to add dataframes
# Including part of the logic of the previous function
def sum_df(*args):
    """Sum the contents of the dataframes using
       the Date index and column names as a guide.
    """
    # list for saving the datafremas to add
    dataframes = []
    
    # loop for reading the file and adding the thermal efficiency column
    for arg in args:
        
        # loading the file
        csv_file = pd.read_csv(f"/kaggle/input/0db0ed38-868c-40d2-bcbd-bcedb88543cf/dataset/dataset/{arg}", index_col=0)
        
        # normalize the index for all dataframes
        csv_file.index.name = "Date"
        
        # Changing NaN values to 0 to enable the sum
        csv_file = csv_file.fillna(0)
        
        # Changing the type of the index from object to datetime
        csv_file.index = pd.to_datetime(csv_file.index)
        
        # Selecting the FUEL_LHV of the corresponding file
        fuel_lvh = new.loc[new["FILE_ID"] == arg]["FUEL_LHV"]
        
        # Dummy lists for a temporary column        
        fuel_column = []
        porcentage = []
        
        # Loop to add values to list base on length of dataframe
        for i in range(8761):
            fuel_column.append(float(fuel_lvh))
            porcentage.append(100)
    
        # Creating fuel column dummie
        csv_file["fuel_column"] = fuel_column
        
        # Creating porcentage column dummie
        csv_file["porcentage"] = porcentage
        
        # Performing the calculation of the thermal efficiency based on the formula
        csv_file["THERMAL_e"] = (csv_file["POWER"] / (csv_file["FUEL_FLOW"] * csv_file["fuel_column"])) * csv_file["porcentage"]
        
        # Deleting dummie columns
        csv_file.drop(["fuel_column", "porcentage"], axis=1 , inplace=True)
        
        # adding the dataframe to the list
        dataframes.append(csv_file.fillna(0))
        
    # Calculating Average Efficiency
    # List for thermal efficiency mean
    thermal_e = []
    
    # Loop for adding the dataframe mean
    for df in dataframes:
        thermal_e.append(float(df["THERMAL_e"].mean()))
    
    # Calculating Average
    efficiency = sum(thermal_e)/len(thermal_e)
    
    # Adding one by one until there are none left 
    return reduce(lambda x, y: x.add(y, fill_value=0), dataframes), efficiency

In [9]:
# Formula for load multiple dataframes
def multitable_dataframe(df, group, selected_group):
    """Return the sum of the selected tables
    """
    select = df.groupby(group)["FILE_ID"].apply(list)[selected_group]
    return sum_df(*select)

In [10]:
# Loading the data per engine type
engine1, efficiency1 = multitable_dataframe(engine_metadata, "ENGINE_ID", "ENGINE_1")
engine2, efficiency2 = multitable_dataframe(engine_metadata, "ENGINE_ID", "ENGINE_2")
engine3, efficiency3 = multitable_dataframe(engine_metadata, "ENGINE_ID", "ENGINE_3")
engine4, efficiency4 = multitable_dataframe(engine_metadata, "ENGINE_ID", "ENGINE_4")
engine5, efficiency5 = multitable_dataframe(engine_metadata, "ENGINE_ID", "ENGINE_5")
engine6, efficiency6 = multitable_dataframe(engine_metadata, "ENGINE_ID", "ENGINE_6")
engine7, efficiency7 = multitable_dataframe(engine_metadata, "ENGINE_ID", "ENGINE_7")
engine8, efficiency8 = multitable_dataframe(engine_metadata, "ENGINE_ID", "ENGINE_8")

In [11]:
#Creating a dictionary for the visual
source = pd.DataFrame({"Engine_type": [
            "Engine_1",
            "Engine_2",
            "Engine_3",
            "Engine_4",
            "Engine_5",
            "Engine_6",
            "Engine_7",
            "Engine_8"
        ],
        "value": [
            efficiency1,
            efficiency2,
            efficiency3,
            efficiency4,
            efficiency5,
            efficiency6,
            efficiency7,
            efficiency8
        ]})

In [12]:
source

Unnamed: 0,Engine_type,value
0,Engine_1,22.289093
1,Engine_2,22.346286
2,Engine_3,23.168484
3,Engine_4,22.743465
4,Engine_5,23.043314
5,Engine_6,21.137872
6,Engine_7,20.557539
7,Engine_8,21.638046


## Thermal efficiency per engine type

In [13]:
alt.Chart(source).mark_arc().encode(
    theta=alt.Theta(field="value", type="quantitative"),
    color=alt.Color(field="Engine_type", type="nominal"),
)

## Conclution

No hay una diferencia significativa en la eficiencia por el tipo de motor.

Although the total emissions data shows large differences between types of turbine, this may be due to a variation in the quantity of the type of engine in operation.