In [1]:
import paramiko
import os
import re
import requests
import json
import time

In [2]:
def run_in_master(command):
    ssh = paramiko.SSHClient()
    ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
    ssh.connect("namenode", username="root", password="pass")
    ssh_stdin, ssh_stdout, ssh_stderr = ssh.exec_command(f"cd /app/ && . /env_var_path.sh && {command}")
    return (ssh_stdout.readlines(), ssh_stderr.readlines())

def merge_results(path):
    run_in_master(f"hdfs dfs -cat {path}/part-* | hdfs dfs -put - {path}/merged.txt")
    
def get_data_from_output_path(path):
    return f"{path}/merged.txt"

def print_hdfs_output(path):
    raw = run_in_master(f"hdfs dfs -cat {get_data_from_output_path(path)}")[0]
    print("\n".join(raw[0:1000]))
    
def get_time(res, max_attempts: int = 4):
    def get_id(res):
        for line in res[1]:
            m = re.search('tracking URL: http://resourcemanager:8088/proxy/(.*)/', line)
            if m != None and m.group(1) != '':
                return m.group(1)
        return ''

    def get_time_from_data(data):
        sum = 0
        for attemp in data['attempts']:
            if attemp['completed'] is False:
                print('spark history server has not updated (yet)')
                return -1
            sum = sum + attemp['duration']
        return sum
        
    id = get_id(res)
    if id == -1:
        return -1

    attempt = 0

    while attempt < max_attempts:
        attempt = attempt + 1
        response = requests.get(f'http://namenode:18080/api/v1/applications/{id}')
        if not response.ok:
            print('WARNING: application error')
            return -1
            
        data = json.loads(response.text)
        t = get_time_from_data(data)

        if t >= 0:
            return t
        time.sleep(3)
    print('WARNING: maximum attempts exceeded')
    return -1

# Covid

## SQL

In [3]:
run_in_master(f"hdfs dfs -rm -r /spark-result/covid/sql")
spark_result = run_in_master("spark-submit --master yarn --deploy-mode cluster /data/master_volume/spark_scripts/covid_sql.py")
merge_results("/spark-result/covid/sql")
print(f"Took {get_time(spark_result)}ms")
print_hdfs_output("/spark-result/covid/sql")

Took 16056s
date,location,total_cases,new_cases,total_deaths,new_deaths,new_cases_per_million,average_new_cases_per_million

2022-05-03,Curacao,42035,0,273,0,0,160.00584439903545

2022-05-04,Curacao,42330,295,274,1,1543,160.00584439903545

2022-05-05,Curacao,42330,0,274,0,0,160.00584439903545

2022-05-06,Curacao,42330,0,274,0,0,160.00584439903545

2022-05-07,Curacao,42330,0,274,0,0,160.00584439903545

2022-05-08,Curacao,42330,0,274,0,0,160.00584439903545

2022-05-09,Curacao,42330,0,274,0,0,160.00584439903545

2022-05-10,Curacao,42330,0,274,0,0,160.00584439903545

2022-05-11,Curacao,42674,344,275,1,1799,160.00584439903545

2022-05-12,Curacao,42674,0,275,0,0,160.00584439903545

2022-05-13,Curacao,42674,0,275,0,0,160.00584439903545

2022-05-14,Curacao,42674,0,275,0,0,160.00584439903545

2022-05-15,Curacao,42674,0,275,0,0,160.00584439903545

2022-05-16,Curacao,42674,0,275,0,0,160.00584439903545

2022-05-17,Curacao,42674,0,275,0,0,160.00584439903545

2022-05-18,Curacao,43149,475,276,1,2484,

## DataFrame

In [4]:
run_in_master(f"hdfs dfs -rm -r /spark-result/covid/df")
spark_result = run_in_master("spark-submit --master yarn --deploy-mode cluster /data/master_volume/spark_scripts/covid_df.py")
merge_results("/spark-result/covid/df")
print(f"Took {get_time(spark_result)}ms")
print_hdfs_output("/spark-result/covid/df")

Took 14406s
date,location,total_cases,new_cases,total_deaths,new_deaths,new_cases_per_million,average_new_cases_per_million

2022-05-03,Curacao,42035,0,273,0,0,164.986180900309

2022-05-04,Curacao,42330,295,274,1,1543,164.986180900309

2022-05-05,Curacao,42330,0,274,0,0,164.986180900309

2022-05-06,Curacao,42330,0,274,0,0,164.986180900309

2022-05-07,Curacao,42330,0,274,0,0,164.986180900309

2022-05-08,Curacao,42330,0,274,0,0,164.986180900309

2022-05-09,Curacao,42330,0,274,0,0,164.986180900309

2022-05-10,Curacao,42330,0,274,0,0,164.986180900309

2022-05-11,Curacao,42674,344,275,1,1799,164.986180900309

2022-05-12,Curacao,42674,0,275,0,0,164.986180900309

2022-05-13,Curacao,42674,0,275,0,0,164.986180900309

2022-05-14,Curacao,42674,0,275,0,0,164.986180900309

2022-05-15,Curacao,42674,0,275,0,0,164.986180900309

2022-05-16,Curacao,42674,0,275,0,0,164.986180900309

2022-05-17,Curacao,42674,0,275,0,0,164.986180900309

2022-05-18,Curacao,43149,475,276,1,2484,164.986180900309

2022-05-19,C

# Steam

## SQL

In [13]:
run_in_master(f"hdfs dfs -rm -r /spark-result/steam/sql")
spark_result = run_in_master("spark-submit --master yarn --deploy-mode cluster /data/master_volume/spark_scripts/steam_sql.py")
merge_results("/spark-result/steam/sql")
print(f"Took {get_time(spark_result)}ms")
print_hdfs_output("/spark-result/steam/sql")

spark history server has not updated (yet)
Took 14868ms
steam_appid,coming_soon,date,appid,name,positive,negative,owners,ccu

907680,false,"Aug 17, 2018",907680,Wwbit,710,566,"100,000 .. 200,000",0

823550,false,"Sep 18, 2018",823550,Booty Calls,255,294,"100,000 .. 200,000",134

639780,false,"Dec 7, 2017",639780,Deep Space Waifu: FLAT JUSTICE,1449,67,"50,000 .. 100,000",4

steam_appid,coming_soon,date,appid,name,positive,negative,owners,ccu

896890,false,"Dec 23, 2019",896890,VR Paradise - Steam Edition,138,50,"20,000 .. 50,000",11

726360,false,"May 22, 2020",726360,BOOBS SAGA: Prepare To Hentai Edition,367,103,"20,000 .. 50,000",0

723090,false,"Oct 24, 2017",723090,Meltys Quest,446,10,"20,000 .. 50,000",36

712790,false,"Oct 2, 2017",712790,Crimson Memories,51,21,"20,000 .. 50,000",0

825300,false,"Nov 22, 2018",825300,To Trust an Incubus,43,5,"0 .. 20,000",1

937730,false,"Dec 24, 2018",937730,Lady's Hentai Mosaic,21,4,"0 .. 20,000",0

588920,false,"Apr 7, 2017",588920,BADASS,15,14

## DataFrame

In [7]:
run_in_master(f"hdfs dfs -rm -r /spark-result/steam/df")
spark_result = run_in_master("spark-submit --master yarn --deploy-mode cluster /data/master_volume/spark_scripts/steam_df.py")
merge_results("/spark-result/steam/df")
print(f"Took {get_time(spark_result)}ms")
print_hdfs_output("/spark-result/steam/df")

spark history server has not updated (yet)
spark history server has not updated (yet)
spark history server has not updated (yet)
Took 16815ms
steam_appid,coming_soon,date,appid,name,positive,negative,owners,ccu

907680,false,"Aug 17, 2018",907680,Wwbit,710,566,"100,000 .. 200,000",0

823550,false,"Sep 18, 2018",823550,Booty Calls,255,294,"100,000 .. 200,000",134

639780,false,"Dec 7, 2017",639780,Deep Space Waifu: FLAT JUSTICE,1449,67,"50,000 .. 100,000",4

steam_appid,coming_soon,date,appid,name,positive,negative,owners,ccu

896890,false,"Dec 23, 2019",896890,VR Paradise - Steam Edition,138,50,"20,000 .. 50,000",11

726360,false,"May 22, 2020",726360,BOOBS SAGA: Prepare To Hentai Edition,367,103,"20,000 .. 50,000",0

723090,false,"Oct 24, 2017",723090,Meltys Quest,446,10,"20,000 .. 50,000",36

712790,false,"Oct 2, 2017",712790,Crimson Memories,51,21,"20,000 .. 50,000",0

825300,false,"Nov 22, 2018",825300,To Trust an Incubus,43,5,"0 .. 20,000",1

937730,false,"Dec 24, 2018",937730,Lad

In [3]:
run_in_master(f"hdfs dfs -rm -r /spark-result/steam/sql1")
spark_result = run_in_master("spark-submit --master yarn --deploy-mode cluster /data/master_volume/spark_scripts/steam1.py")
merge_results("/spark-result/steam/sql1")
print(f"Took {get_time(spark_result)}ms")
print_hdfs_output("/spark-result/steam/sql1")

Took 14526ms
appid,name,positive,negative,owners,ccu

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,,,,,

,

In [4]:
run_in_master(f"hdfs dfs -rm -r /spark-result/steam/sql2")
spark_result = run_in_master("spark-submit --master yarn --deploy-mode cluster /data/master_volume/spark_scripts/steam2.py")
merge_results("/spark-result/steam/sql2")
print(f"Took {get_time(spark_result)}ms")
print_hdfs_output("/spark-result/steam/sql2")

Took 14539ms
appid

22490

790650

907680

823550

639780

896890

726360

723090

712790

681820

681840

868980

681830

560000

958480

966460

603120

962380

914140

929310

906050

502300

825300

937730

588920

555310

920460

appid

891870

1004650

912190

935560

592750

975020

1014730

914850

929300

929290

946800

677730

965810

1013180

961640

928060



In [5]:
run_in_master(f"hdfs dfs -rm -r /spark-result/steam/sql3")
spark_result = run_in_master("spark-submit --master yarn --deploy-mode cluster /data/master_volume/spark_scripts/steam3.py")
merge_results("/spark-result/steam/sql3")
print(f"Took {get_time(spark_result)}ms")
print_hdfs_output("/spark-result/steam/sql3")

Took 13452ms
name

Fallout: New Vegas

MARVEL END TIME ARENA

Wwbit

Booty Calls

Deep Space Waifu: FLAT JUSTICE

VR Paradise - Steam Edition

BOOBS SAGA: Prepare To Hentai Edition

Meltys Quest

Crimson Memories

(Chinese PaladinSword and Fairy 4)

Chinese PaladinSword and Fairy 5 Prequel

DEEP SPACE WAIFU: NEKOMIMI

(Chinese PaladinSword and Fairy 5)

Ladykiller in a Bind

Seed of the Dead

Undress Tournament

Happy Campers

HOT FIT!

Hentai Dojo

Kamasutra Connect : Sexy Hentai Girls

Hentai Case Opening

Heartomics: Lost Count

To Trust an Incubus

Lady's Hentai Mosaic

BADASS

Satellite

Hentai Space

name

King of Phoenix

Unlock Me

Hentai IQ Puzzle

Hentai Strip Shot

SPACE-FRIGHT

The Spirit Master of Retarnia -Conqueror of the Labyrinth-

Cyndy

HENTAI PUZZLE

Chroma : Sexy Hentai Girls

Strip Breaker : Hentai Girls

Hentai 2+2=4

Karmasutra

Kara no Shojo

Funbag Fantasy

The Tower of Five Hearts

Hentai Lady



In [8]:
run_in_master(f"hdfs dfs -rm -r /spark-result/steam/sql4")
spark_result = run_in_master("spark-submit --master yarn --deploy-mode cluster /data/master_volume/spark_scripts/steam4.py")
merge_results("/spark-result/steam/sql4")
print(f"Took {get_time(spark_result)}ms")
print_hdfs_output("/spark-result/steam/sql4")

spark history server has not updated (yet)
spark history server has not updated (yet)
spark history server has not updated (yet)
Took 13297ms
appid,nam

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,

,