# ETL Pipeline Pterodactyl Resource Consumption

### Index

- Install requierements
- Import libraries and setup key variables
- Define functions
- Get Pterodactyl Utilization Information
- Load data into the Postgres database

## Install requierements

In [None]:
pip install -r requirements.txt

## Import libraries and setup key variables
Remember to add you own credentials in the .env file for them to be loaded here

In [1]:
import time, os
from sqlalchemy import create_engine, text
from pydactyl import PterodactylClient
from dotenv import load_dotenv
from datetime import datetime
import pandas as pd

# Load .env file credentials
load_dotenv()

# Database connection
host = os.getenv('POSTGRES_HOST')
port = os.getenv('POSTGRES_PORT')
database = os.getenv('POSTGRES_DATABASE')
username = os.getenv('POSTGRES_USERNAME')
password = os.getenv('POSTGRES_PASSWORD')
connection = f'postgresql://{username}:{password}@{host}:{port}/{database}'

# Pterodactyl connection
pterodactyl_url = os.getenv('PTERODACTYL_URL')
application_api_key = os.getenv('PTERODACTYL_APP_KEY')
client_api_key = os.getenv('PTERODACTYL_CLI_KEY')

# Connecto to Pterodactyl Application API
api_app = PterodactylClient(pterodactyl_url, application_api_key, debug=False)
# Connecto to Pterodactyl Client API
api_cli = PterodactylClient(pterodactyl_url, client_api_key, debug=False)

## Define functions

In [2]:
# Function to transform from bytes to megabytes
from functions import bytes_to_megabytes

## Get Pterodactyl Utilization Information
About: current state, memory bytes, cpu absolute, disk bytes, network rx/tx bytes, uptime

In [3]:
# Setting variables
SCHEMA = 'pterodactyl'
WINDOW_EXTRACTION_TIME = 30 # the time window in which it recieves data from the servers [seconds]
BREAK_TIME = 10 # the time it takes to rest after getting data from all servers [seconds]
WAITING_TIME = 1 # the time it takes to rest after getting date from each server [seconds]

# Define the schema and extrat all uuid from every server from postgres
engine = create_engine(connection)
with engine.connect() as conn:
    list_of_uuid = conn.execute(text(f'SELECT servers.uuid FROM {SCHEMA}.servers  WHERE servers.is_active = true'))
    result = list_of_uuid.fetchall()
    list_servers = [uuid for uuid, in result] #remove the tuples of uuid from results

# Extract the data from every uuid in the postgres database
all_utilizations = []
start_time = time.time()
while (time.time() - start_time) < WINDOW_EXTRACTION_TIME:
    print(time.time() - start_time)
    for server in list_servers:
        try:
            consumption = api_cli.client.servers.get_server_utilization(server)
            consumption.update({'identifier': server[:8]})
            all_utilizations.append(consumption)
        except:
            pass
        time.sleep(WAITING_TIME)
    time.sleep(BREAK_TIME)
print(time.time() - start_time)

# Create the dataframe and extract data from resources
df_consumptions = pd.DataFrame(all_utilizations)
df_consumptions['status'] = df_consumptions['current_state'].replace({'running': True, 'offline': False})
df_consumptions['cpu'] = df_consumptions['resources'].apply(lambda x: x.get('cpu_absolute', None))
df_consumptions['ram'] = df_consumptions['resources'].apply(lambda x: bytes_to_megabytes(x.get('memory_bytes', None)))
df_consumptions['disk'] = df_consumptions['resources'].apply(lambda x: bytes_to_megabytes(x.get('disk_bytes', None)))
df_consumptions['capture_time'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')

df_consumptions = df_consumptions[['identifier', 'status', 'ram', 'cpu', 'disk', 'capture_time']]

# Group by 'identifier' and calculate mean, std, min, and max for each group
final_utilization_df = df_consumptions.groupby('identifier').agg({
    'status': ['first'],  # Include 'current_state' in the aggregation
    'ram': ['mean', 'std', 'min', 'max'],
    'cpu': ['mean', 'std', 'min', 'max'],
    'disk': ['mean', 'std', 'min', 'max'],
    'capture_time': ['first']  
}).reset_index()

# Flatten the column names
final_utilization_df.columns = ['_'.join(col).strip() for col in final_utilization_df.columns.values]

# Rename columns
final_utilization_df = final_utilization_df.rename(columns={'identifier_': 'server_identifier'})
final_utilization_df = final_utilization_df.rename(columns={'status_first': 'status'})
final_utilization_df = final_utilization_df.rename(columns={'capture_time_first': 'capture_time'})
print(final_utilization_df)

0.0020062923431396484
39.03044056892395
   server_identifier  status     ram_mean  ram_std      ram_min      ram_max  \
0           1a76c940    True  1697.136719      NaN  1697.136719  1697.136719   
1           1d3ffc65   False     0.000000      NaN     0.000000     0.000000   
2           3299f0f0   False     0.000000      NaN     0.000000     0.000000   
3           453f605d   False     0.000000      NaN     0.000000     0.000000   
4           45f311a5   False     0.000000      NaN     0.000000     0.000000   
5           4f125f52   False     0.000000      NaN     0.000000     0.000000   
6           7cc366dc   False     0.000000      NaN     0.000000     0.000000   
7           9c032ee0    True  1634.957031      NaN  1634.957031  1634.957031   
8           9e053c46   False     0.000000      NaN     0.000000     0.000000   
9           b1109534   False     0.000000      NaN     0.000000     0.000000   
10          cce22b85   False     0.000000      NaN     0.000000     0.000000   


## Load data into Data Warehouse (Postgres)

In [6]:
# Set Schema and Table names in Postgres
SCHEMA = 'pterodactyl'
TABLE = 'utilization'

# Connect to database and upload all new logs into table
engine = create_engine(connection)
with engine.connect() as conn:

# Start a new transaction
    trans = conn.begin()

    try:
        # Load all new activity into postgres
        final_utilization_df.to_sql(name = TABLE, schema = SCHEMA, con = conn, if_exists='append', index=False)
        # Commit the transaction
        trans.commit()

    except Exception as e:
        # Rollback the transaction on exception
        print('!!! [ERROR IN DATABASE QUERIES] !!!')
        trans.rollback()
        print('Transaction has been rolled back')
        print(f'Error occurred during transaction:\n{e}')
        raise