<a href="https://colab.research.google.com/github/NiloofarSoltani-2/Pescara_Energy_Consumption/blob/main/Pescara_Energy_Consumption.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import pandas as pd

msu_directory = '/content/msu/'
msu_excel_files = [file for file in os.listdir(msu_directory)]

msu_dfs = []
for file in msu_excel_files:
    file_path = os.path.join(msu_directory, file)
    df = pd.read_excel(file_path)
    msu_dfs.append(df)
msu_df = pd.concat(msu_dfs, ignore_index=True)

msu_df.head()

Unnamed: 0,Sem.,Legenda,Linea di produzione,Materiale,Versione di prod.,Data prod.,Turno,%TSR,% Scarto,% Sched. Util.,...,Pz / min,PZ Buoni in UMA,UMA,PZ Buoni,PZ Totali,PZ Scarto,TN (min),PZ/CAR,PZ Totali in UMA,PZ Scarto in UMA
0,,Tot.Gio/Art,CPEMA01,2741031.0,,2023-10-25,,83.333,0.0,66.667,...,145.0,0.625,MSU,116000,116000,0,960,0,0.625,0.0
1,,Tot.Gio/Art,CPEMA01,2741031.0,,2023-10-26,,75.0,0.0,66.667,...,145.0,0.5625,MSU,104400,104400,0,960,0,0.563,0.0
2,,Tot.Gio/Art,CPEMA01,2741031.0,,2023-10-27,,58.333,0.0,66.667,...,145.0,0.4375,MSU,81200,81200,0,960,0,0.438,0.0
3,,Tot.Gio/Art,CPEMA01,2741031.0,,2023-10-30,,50.0,0.0,33.333,...,145.0,0.1875,MSU,34800,34800,0,480,0,0.188,0.0
4,,Tot.Gio/Art,CPEMA01,2741031.0,,2023-10-31,,66.667,0.0,66.667,...,145.0,0.5,MSU,92800,92800,0,960,0,0.5,0.0


In [None]:
import sqlite3
conn = sqlite3.connect(':memory:')
msu_df.to_sql('msu', conn, index=False, if_exists='replace')

query = """
SELECT "Linea di produzione", count(*)
FROM msu
where "Data Prod" is not null
group by 1
order by 1
"""
result = pd.read_sql_query(query, conn)

result.head(100)

Unnamed: 0,Linea di produzione,count(*)
0,,32
1,CPEMA01,501
2,DIFA-101,3564
3,DIFA-102,6542
4,DIFA-103,5732
5,DIFA-104,5797
6,DIFA-105,5112
7,DIFA-107,1401
8,FAX15,2802
9,FGC1,4277


In [None]:
# important columns:
# Linea di produzione: (production_line)
# Data prod: (date)
# TN (min): (production_time_in_minutes)
# PZ Totali in UMA: (total_products_produced)

# data cleaning:
# rename all the columns to english names without spaces
# filter these production lines: FNL8, FNL13, FAX15, CPEMA01
# date: should not be null
# production_time_in_minutes needs no cleaning (range:0 to 999)
# total_products_produced: replace negative values with zero

In [None]:
msu_cleaned = msu_df.copy()

# Rename columns to English without spaces
msu_cleaned = msu_cleaned.rename(columns={
    'Linea di produzione': 'production_line',
    'Data prod.': 'date',
    'TN (min)': 'production_time_in_minutes',
    'PZ Totali in UMA': 'total_products_produced'
})

# Filter the production lines: keep only FNL8, FNL13, FAX15, CPEMA01
msu_cleaned = msu_cleaned[~msu_cleaned['production_line'].isin(['FNL8', 'FNL13', 'FAX15', 'CPEMA01'])]
msu_cleaned['production_line'] = msu_cleaned['production_line'].str.replace('[^A-Za-z0-9]', '', regex=True)

# Remove rows where the date is null
msu_cleaned = msu_cleaned.dropna(subset=['date'])
msu_cleaned['date'] = pd.to_datetime(msu_cleaned['date'], format='%d/%m/%Y')

# Convert 'production_time_in_minutes' to numeric, coercing errors to NaN
msu_cleaned['production_time_in_minutes'] = pd.to_numeric(msu_cleaned['production_time_in_minutes'], errors='coerce')

# Remove rows where production_time_in_minutes is negative
msu_cleaned = msu_cleaned[msu_cleaned['production_time_in_minutes'] >= 0]

# Replace negative values in total_products_produced with zero
msu_cleaned['total_products_produced'] = msu_cleaned['total_products_produced'].apply(lambda x: max(x, 0))

msu_cleaned = msu_cleaned[['production_line', 'date', 'production_time_in_minutes', 'total_products_produced']]

# Display the cleaned data
msu_cleaned.head()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  msu_cleaned['total_products_produced'] = msu_cleaned['total_products_produced'].apply(lambda x: max(x, 0))


Unnamed: 0,production_line,date,production_time_in_minutes,total_products_produced
72,DIFA102,2023-09-28,915.0,1.89
73,DIFA102,2023-10-27,0.0,0.0
74,DIFA102,2023-07-03,1355.0,4.831
75,DIFA102,2023-07-04,193.0,0.863
76,DIFA102,2023-07-29,597.0,1.385


In [None]:
consumption_directory = '/content/consumption/'
consumption_excel_files = [file for file in os.listdir(consumption_directory)]

consumption_dfs = []
for file in consumption_excel_files:
    file_path = os.path.join(consumption_directory, file)
    df = pd.read_excel(file_path)
    df['consumption_unit'] = df.columns[1][:-6]
    df.rename(columns={df.columns[1]: 'consumption_kwh'}, inplace=True)
    df = df[['Tempo', 'consumption_unit', 'consumption_kwh']]
    consumption_dfs.append(df)
consumption_df = pd.concat(consumption_dfs, ignore_index=True)

consumption_df.head(10)

Unnamed: 0,Tempo,consumption_unit,consumption_kwh
0,2018-07-01 00:00:00,EATT_QSP2,2367.76
1,2018-07-02 00:00:00,EATT_QSP2,3518.22
2,2018-07-03 00:00:00,EATT_QSP2,3519.36
3,2018-07-04 00:00:00,EATT_QSP2,3517.13
4,2018-07-05 00:00:00,EATT_QSP2,3525.56
5,2018-07-06 00:00:00,EATT_QSP2,3529.62
6,2018-07-07 00:00:00,EATT_QSP2,3542.56
7,2018-07-08 00:00:00,EATT_QSP2,2347.2
8,2018-07-09 00:00:00,EATT_QSP2,3505.55
9,2018-07-10 00:00:00,EATT_QSP2,3538.98


In [None]:
import sqlite3
conn = sqlite3.connect(':memory:')
consumption_df.to_sql('consumption', conn, index=False, if_exists='replace')

query = """
SELECT consumption_unit, count(*) as cnt
FROM consumption
group by 1
order by 1
limit 67
"""
result = pd.read_sql_query(query, conn)

# result.head()
consumption_units = result['consumption_unit'].tolist()
print(consumption_units)

['DELTA_Cab_B', 'Delta_Cab_A', 'Delta_Cab_C', 'EATT_CAMFIL_2', 'EATT_CAMFIL_5', 'EATT_CENT_TERMICA', 'EATT_CHILLER_1', 'EATT_CHILLER_1_C', 'EATT_CHILLER_2', 'EATT_CHILLER_3_LOTTO_C', 'EATT_CHILLER_4_LOTTO_C', 'EATT_COMPRESSORI', 'EATT_COMPR_VUOTO', 'EATT_COV', 'EATT_CTRL_FREDDO', 'EATT_CTRL_IDRICA', 'EATT_DRY_FILTRATION', 'EATT_FAX_15', 'EATT_FGC_5', 'EATT_FIX_2', 'EATT_FIX_3', 'EATT_FIX_4', 'EATT_FIX_5', 'EATT_FNL_3', 'EATT_FNL_8', 'EATT_GEN_LOTTO_B', 'EATT_GLICOLE', 'EATT_GLOBAL_2', 'EATT_GLOBAL_3', 'EATT_GLOBAL_4', 'EATT_GLOBAL_5', 'EATT_GLOBAL_7 (EX GLOBAL1)', 'EATT_HYBRID', 'EATT_IDROFILTRO_1C', 'EATT_IDROFILTRO_2C', 'EATT_IDROFILTRO_4', 'EATT_INGEGNERIA', 'EATT_LGV', 'EATT_LUCI_LOTTO_B', 'EATT_MAG_PROD_FINITO_1', 'EATT_MAG_PROD_FINITO_2', 'EATT_MAG_PROD_FINITO_3', 'EATT_OSPREY3+CAMFIL3', 'EATT_POS_ID10_SEP_C', 'EATT_PRIMAVERA', 'EATT_QD01C_NORMALE', 'EATT_QD01C_PREFERENZIALE', 'EATT_QD02C_NORMALE', 'EATT_QD02C_PREFERENZIALE', 'EATT_QSP1', 'EATT_QSP2', 'EATT_QSP3_OSPREY1', 'EATT_Q

In [None]:
# data cleaning:
# rename columns to english: Tempo: date
# remove non-date values from Tempo column, and convert it to datetime afterwards

In [None]:
cleaned_consumption = consumption_df.copy()

# Remove non-date values from Tempo column
cleaned_consumption = cleaned_consumption[pd.to_datetime(cleaned_consumption['Tempo'], errors='coerce').notnull()]
cleaned_consumption.rename(columns={'Tempo': 'date'}, inplace=True)
cleaned_consumption['date'] = pd.to_datetime(cleaned_consumption['date'], format='%d/%m/%Y')

cleaned_consumption.head()

Unnamed: 0,date,consumption_unit,consumption_kwh
0,2018-07-01,EATT_QSP2,2367.76
1,2018-07-02,EATT_QSP2,3518.22
2,2018-07-03,EATT_QSP2,3519.36
3,2018-07-04,EATT_QSP2,3517.13
4,2018-07-05,EATT_QSP2,3525.56


In [None]:
production_units = {
    "Lotto_FC (Lotto C)": [
        "FNL3",
        "FGC1",
        "FGC2",
        "FGC3",
        "FGC5",
        "HYB1",
        "PVA1"
    ],
    "Lotto_BC (Lotto A)": [
        "DIFA-102",
        "DIFA-103",
        "DIFA-104",
        "DIFA-105",
        "DIFA-107",
        "WIFA106"
    ],
    "Lotto_AC (Lotto B)": [
        "FIX2",
        "FIX3",
        "FIX5",
        "FAX15",
        "FIX4",
    ],
    "Others": [
        "CPEMA01",
        "DIFA-101",
        "FAX15",
        "FGC4",
        "FIX1",
        "FNL8",
        "FNL13",
        "MAFA-101",
        "MAFA-102"
    ]
}

In [None]:
utilities_mapping = {
    "EATT_CAMFIL_2": {
        "production_lines": [
            'FAX15',
            "FGC1",
            "FGC2",
            "FGC3",
            "FGC5",
            "FIX2",
            "FIX3",
            "FIX4",
            "FIX5",
            "FNL3",
            "HYB",
            "PVA",
        ],
        "production_unit": ["lotto_C","lotto_B"],
    },
    "EATT_CAMFIL_5": {
        "production_lines": ["DIFA107"],
        "production_unit": "lotto_A"
    },
    "EATT_COV": {
        "production_lines": [
            'FAX15',
            "FGC1",
            "FGC2",
            "FGC3",
            "FGC5",
            "FIX2",
            "FIX3",
            "FIX5",
            "FNL3",
            "HYB",
            "PVA",
        ],
        "production_unit": ["lotto_B", "lotto_C"],
    },
    "EATT_IDROFILTRO_1C": {
        "production_lines": [
            "FGC1",
            "FGC2",
            "FGC3",
            "FNL3"
        ],
        "production_unit": "lotto_C",
    },
    "EATT_IDROFILTRO_2C": {
        "production_lines": [
            "FNL3",
            "HYB",
            "PVA"
        ],
        "production_unit": "lotto_C",
    },
    "EATT_IDROFILTRO_4": {
        "production_lines": [
            "FIX2",
            "FIX3"
        ],
        "production_unit": "lotto_B",
    },
    "EATT_COMPR_VUOTO": {
        "production_lines": [
            "DIFA102",
            "DIFA103",
            "DIFA104",
            "DIFA105",
            "DIFA107",
            "WIFA106",
            "FGC1",
            "FGC2",
            "FGC3",
            "FGC5",
            "FIX2",
            "FIX3",
            "FIX4",
            "FIX5",
            'FAX15',
            "FNL3",
            "HYB",
            "PVA",
        ],
        "production_unit": ["lotto_A", "lotto_B", "lotto_C"],
    },
    "EATT_COMPRESSORI": {
        "production_lines": [
            "DIFA102",
            "DIFA103",
            "DIFA104",
            "DIFA105",
            "DIFA107",
            "WIFA106",
            "FGC1",
            "FGC2",
            "FGC3",
            "FGC5",
            "FIX2",
            "FIX3",
            "FIX4",
            "FIX5",
            'FAX15',
            "FNL3",
            "HYB",
            "PVA",
        ],
        "production_unit": ["lotto_A", "lotto_B", "lotto_C"],
    },
    "EATT_GLICOLE": {
        "production_lines": [
            "DIFA102",
            "DIFA103",
            "DIFA104",
            "DIFA105",
            "WIFA106",
            "FIX2",
            "FIX3",
            "FIX4",
            "FIX5",
            'FAX15'
        ],
        "production_unit": ["lotto_A", "lotto_B"],
    },
    "EATT_TRIMMER_REMOVAL_A + CVC": {
        "production_lines": [
            "DIFA102",
            "DIFA103",
            "DIFA104",
            "DIFA105",
            "DIFA107",
            "WIFA106",
        ],
        "production_unit": "lotto_A",
    },
    "EATT_DRY_FILTRATION": {
        "production_lines": [
            "DIFA102",
            "DIFA103",
            "DIFA104",
            "DIFA105",
            "WIFA106",
        ],
        "production_unit": "lotto_A",
    },
    "EATT_TRIM_REMOVAL_C": {
        "production_lines": [
            "FGC1",
            "FGC2",
            "FGC3",
            "FGC5",
            "FIX2",
            "FIX3",
            "FIX4",
            "FIX5",
            'FAX15',
            "FNL3",
            "HYB",
            "PVA",
        ],
        "production_unit": ["lotto_B", "lotto_C"],
    },
    "EATT_UTENZE_TECNOLOG": {
        "production_lines": [
            "FNL8"
        ],
        "production_unit": "lotto_C",
    },
    "EATT_UTILITIES_GLOBAL_12345": {
        "production_lines": [
            "DIFA102",
            "DIFA103",
            "DIFA104",
            "DIFA105"
        ],
        "production_unit": "lotto_A",
    },
    "EATT_QSP3_OSPREY1": {
        "production_lines": [
            "DIFA102",
            "DIFA103",
            "DIFA104",
            "DIFA105",
            "WIFA106",
        ],
        "production_unit": "lotto_A",
    },
    "EATT_RULLIERE_LOTTO_B": {
        "production_lines": [
            "FIX2",
            "FIX3",
            "FIX4",
            'FAX15',
            "FGC2",
            "FGC3",
            "HYB",
        ],
        "production_unit": "lotto_B",
    },
    "EATT_OSPREY3+CAMFIL3": {
        "production_lines": [
            "FIX2",
            "FIX3",
            "FIX4",
            "FIX5",
            'FAX15'
        ],
        "production_unit": "lotto_B",
    },
    "EATT_QD02C_NORMALE": {
        "production_lines": [
            "DIFA102",
            "DIFA103",
            "DIFA104",
            "DIFA105",
            "WIFA106",
            "FIX2",
            "FIX3",
            "FIX4",
            "FIX5",
            'FAX15',
            "FGC1",
            "FGC2",
            "FGC3",
            "FGC5",
            "FNL3",
            "HYB",
            "PVA",
        ],
        "production_unit": ["lotto_A","lotto_B","lotto_C"],
    },
    "EATT_QD02C_PREFERENZIALE": {
        "production_lines": [
            "DIFA102",
            "DIFA103",
            "DIFA104",
            "DIFA105",
            "WIFA106",
            "FIX2",
            "FIX3",
            "FIX4",
            "FIX5",
            'FAX15',
            "FGC1",
            "FGC2",
            "FGC3",
            "FGC5",
            "FNL3",
            "HYB",
            "PVA",
        ],
        "production_unit": ["lotto_A","lotto_B","lotto_C"],
    },
    "EATT_CTRL_FREDDO": {
        "production_lines": [
            "DIFA102",
            "DIFA103",
            "DIFA104",
            "DIFA105",
            "WIFA106",
        ],
        "production_unit": "lotto_A",
    },
    "EATT_CHILLER_1": {
        "production_lines": [
            "DIFA102",
            "DIFA103",
            "DIFA104",
            "DIFA105",
            "WIFA106",
        ],
        "production_unit": "lotto_A",
    },
    "EATT_CHILLER_2": {
        "production_lines": [
            "DIFA102",
            "DIFA103",
            "DIFA104",
            "DIFA105",
            "WIFA106",
        ],
        "production_unit": "lotto_A",
    },
    "Centrale_Freddo_B/C": {# This is the sum of the following utilities: EATT_SERV_CELLE_FRIGO + EATT_CHILLER_3_LOTTO_C + EATT_CHILLER_4_LOTTO_C + EATT_CHILLER_1_C
        "production_lines": [
            "FGC1",
            "FGC2",
            "FGC3",
            "FGC5",
            "FIX2",
            "FIX3",
            "FIX4",
            "FIX5",
            'FAX15',
            "FNL3",
            "HYB",
            "PVA"
        ],
        "production_unit": ["lotto_B", "lotto_C"],
    },
    "EATT_INGEGNERIA": {
        "production_lines": "None",
        "production_unit": "Other",
    },
    "EATT_QSP1": {
        "production_lines": "None",
        "production_unit": "lotto_A",
    },
    "EATT_QSP2": {
        "production_lines": "None",
        "production_unit": "lotto_A",
    },
    "EATT_LGV": {
        "production_lines": "None",
        "production_unit": "lotto_A",
    },
    "EATT_UTENZE_TECN_LOTTO_B": {
        "production_lines": "None",
        "production_unit": "lotto_B",
    },
    "EATT_QD01C_NORMALE": {
        "production_lines": "None",
        "production_unit": "lotto_C",
    },
    "EATT_QD01C_PREFERENZIALE": {
        "production_lines": "None",
        "production_unit": "lotto_C",
    },
    "EATT_CENT_TERMICA": {
        "production_lines": "None",
        "production_unit": ["lotto_A", "lotto_B", "lotto_C"],
    },
    "DELTA_Cab_B": {
        "production_lines": "None",
        "production_unit": ["lotto_A", "lotto_B"],
    },
    "Delta_Cab_A": {
        "production_lines": "None",
        "production_unit": "lotto_A",
    },
    "Delta_Cab_C": {
        "production_lines": "None",
        "production_unit": "lotto_C",
    },
    "EATT_QUADRO_ASPIRATORI": {
        "production_lines": "None",
        "production_unit": "lotto_C",
    },
    "EATT_LUCI_LOTTO_B": {
        "production_lines": "None",
        "production_unit": "lotto_B",
    },
    "EATT_CTRL_IDRICA": {
        "production_lines": "None",
        "production_unit": "Other",
    },
    "EATT_GEN_LOTTO_B": {
        "production_lines": "None",
        "production_unit": "Other",
    },
    "EATT_MAG_PROD_FINITO_1": {
        "production_lines": "None",
        "production_unit": "Other",
    },
    "EATT_MAG_PROD_FINITO_2": {
        "production_lines": "None",
        "production_unit": "Other",
    },
    "EATT_MAG_PROD_FINITO_3": {
        "production_lines": "None",
        "production_unit": "Other",
    },
    "EATT_UTA_5_LOTTO_A": {
        "production_lines": "None",
        "production_unit": "Other",
    },
    "EATT_UTA_B": {
        "production_lines": "None",
        "production_unit": "Other",
    },
    "EATT_POS_ID10_SEP_C": {
        "production_lines": "None",
        "production_unit": "Other",
    },
}

In [None]:
import sqlite3
conn = sqlite3.connect(':memory:')
msu_cleaned.to_sql('cleaned_msu', conn, index=False, if_exists='replace')
cleaned_consumption.to_sql('cleaned_consumption', conn, index=False, if_exists='replace')

query = """
SELECT *
FROM cleaned_msu
join cleaned_consumption
on cleaned_msu.date = cleaned_consumption.date
  and cleaned_msu.production_line = substr(cleaned_consumption.consumption_unit, 6)
limit 10
"""

# query = """
# SELECT substr(consumption_unit,6) as x, count(*)
# FROM cleaned_consumption
# group by 1
# order by 1
# """

# query = """
# SELECT production_line, count(*)
# FROM cleaned_msu
# group by 1
# order by 1
# """

result = pd.read_sql_query(query, conn)
pd.set_option('display.max_rows', 100)
result.head(100)

Unnamed: 0,production_line,date,production_time_in_minutes,total_products_produced,date.1,consumption_unit,consumption_kwh


In [None]:
# modeling:
we have the total products produced for every production line.
using time series modeling, we can predict the total production for the next 12 months.
then:
- for production lines: we can calculate the energy consumption per production unit (either as a constant value, from dividing the
  total energy consumption by the total production, or as a time series) and then multiply this by the production trend, to give us the
  energy consumption trend.
- for utilities:
  - for utilities that are not related to the production values, we can use time series to predict their trend consumption trend using the
    historical trend.
  - for utilities that are only used when production lines are working: for every production line we have the predicted production values.
    using historical data, we can calculate the time needed to produce one unit of product (either as constant or time series). then we can
    calculate the energy consumption of each utility per unit working time of each production line. by multiplying these numbers, we get
    the energy consumption of each utility unit, based on working time of each production line.