# ETL Pipeline for Pterodactyl Game Servers Logs

### Index

- Install requierements
- Import libraries and setup key variables
- Setup directories, functions and folder creation
- Extract logs from each active Minecraft Server
- Transformation from logs data into information
- Load processed data into Data Warehouse (Postgres)

## Install requierements

In [None]:
pip install -r requirements.txt

## Import libraries and setup key variables
Remember to add you own credentials in the .env file for them to be loaded here

In [None]:
import urllib.request, zipfile, requests, gzip, csv, os, re
from sqlalchemy import create_engine, text
from pydactyl import PterodactylClient
from dotenv import load_dotenv
import pandas as pd

# Load .env file credentials
load_dotenv()

# Database connection
host = os.getenv('POSTGRES_HOST')
port = os.getenv('POSTGRES_PORT')
database = os.getenv('POSTGRES_DATABASE')
username = os.getenv('POSTGRES_USERNAME')
password = os.getenv('POSTGRES_PASSWORD')
connection = f'postgresql://{username}:{password}@{host}:{port}/{database}'

# Pterodactyl connection
pterodactyl_url = os.getenv('PTERODACTYL_URL')
application_api_key = os.getenv('PTERODACTYL_APP_KEY')
client_api_key = os.getenv('PTERODACTYL_CLI_KEY')

# Connecto to Pterodactyl Application API
api_app = PterodactylClient(pterodactyl_url, application_api_key, debug=False)
# Connecto to Pterodactyl Client API
api_cli = PterodactylClient(pterodactyl_url, client_api_key, debug=False)

## Setup directories, functions and folder creation

In [None]:
# Set up schema name in database
schema = 'pterodactyl'
minecraft_schema = 'minecraft'
eggs_minecraft = ('Vanilla Minecraft', 'Forge Minecraft', 'Paper', 'Spigot', 'Mohist')
eggs_ark = ('Ark: Survival Evolved',)
all_eggs = eggs_minecraft + eggs_ark

# Setup directories
pwd = os.getcwd() #os.path.dirname(os.path.realpath(__file__)) this is used for .py files
raw_logs_folder = os.path.join(pwd, 'raw_logs')

# Dfine functions
from functions import mkdir, sort_list_logs, last_index, extract_compressed_file, download_logs, transform_logs

# Create folder if not exist
mkdir(raw_logs_folder)

## Extract logs from each active Minecraft Server

In [None]:
# Get server information from database
engine = create_engine(connection)
with engine.connect() as conn:
    list_servers = conn.execute(text(f'SELECT servers.id, servers.identifier, eggs.name AS egg, last_log_date.last_date FROM {schema}.servers JOIN {schema}.eggs ON eggs.id = servers.egg_id LEFT JOIN {minecraft_schema}.last_log_date ON last_log_date.server_identifier = servers.identifier WHERE servers.is_active = true'))

# Save server information from the query into a dataframe
df_servers = pd.DataFrame(list_servers.fetchall(), columns=list_servers.keys())

# Download the latest logs based on last_log and egg into the staging folder
for server_info in df_servers.iterrows():
    download_logs(id = server_info[1]['id'], identifier = server_info[1]['identifier'], egg = server_info[1]['egg'], last_log = server_info[1]['last_date'], staging_folder = raw_logs_folder)

## Transformation from logs data to information

In [None]:
df_all_logs = pd.DataFrame(columns=['server_id', 'date', 'time', 'information', 'user', 'activity'])

for folder in os.listdir(raw_logs_folder): 
    folder_server_dir = os.path.join(raw_logs_folder, folder)

    # 
    df_logs = transform_logs(id = folder, path_dir = folder_server_dir, df_servers = df_servers)
    # Save every log in df_all_logs for each iteration
    df_all_logs = pd.concat([df_all_logs, df_logs], ignore_index=True)

    # Delete all logs inside each server folder
    for folder in os.listdir(raw_logs_folder):
        folder_server_dir = os.path.join(raw_logs_folder, folder)
        [os.remove(os.path.join(folder_server_dir, log)) for log in os.listdir(folder_server_dir) if log.endswith('.log')]

## Load processed data into Data Warehouse (Postgres)

In [None]:
# Set table name
table = 'activity'

# Connect to database and upload all new logs into table
engine = create_engine(connection)
with engine.connect() as conn:

# Start a new transaction
    trans = conn.begin()

    try:
        # Load all new activity into postgres
        df_all_logs.to_sql(name = table, schema = schema, con = conn, if_exists='append', index=False)
        # Commit the transaction
        trans.commit()

    except Exception as e:
        # Rollback the transaction on exception
        print('!!! [ERROR IN DATABASE QUERIES] !!!')
        trans.rollback()
        print('Transaction has been rolled back')
        print(f'Error occurred during transaction:\n{e}')
        raise