# Minecraft Stats

This notebook summarize minecraft stats files across all worlds saved on the local machine. PathLib is used - configure pathlib to work with your operating system.

The first code cell (immediately below) builds the 

In [72]:
import pandas as pd
import numpy as np
import re
from pathlib import Path
import gzip
import hvplot.pandas
from bokeh.resources import INLINE
import bokeh
import panel as pn
import holoviews as hv
from nbtlib import File

pn.extension(sizing_mode = 'stretch_width')
hv.extension("bokeh")


local_user = 'culley'
# OSX path
path = Path('/Users/{user}/Library/Application Support/minecraft/saves/'.format(user=local_user))
stats_files = list(path.rglob("*/stats/*.json"))
nbtfiles = list(path.rglob("*/level.dat"))


def minecraft_key(key_name):
    return 'minecraft:{key}'.format(key=key_name)

def merge_data_frames(df, df2, df_name):
    return pd.merge(
        df,
        pd.DataFrame({'minecraft_key':df2.keys(), df_name: df2.values()}),
        how="left",
        left_on="minecraft_key",
        right_on="minecraft_key"
    )

def world_stats(path):
    df = pd.read_json(path)
    stats = df['stats']
    broken = stats[minecraft_key('broken')] if minecraft_key('broken') in stats else {}
    crafted = stats[minecraft_key('crafted')] if minecraft_key('crafted') in stats else {}
    custom = stats[minecraft_key('custom')] if minecraft_key('custom') in stats else {}
    dropped = stats[minecraft_key('dropped')] if minecraft_key('dropped') in stats else {}
    killed = stats[minecraft_key('killed')] if minecraft_key('killed') in stats else {}
    mined = stats[minecraft_key('mined')] if minecraft_key('mined') in stats else {}
    picked_up = stats[minecraft_key('picked_up')] if minecraft_key('picked_up') in stats else {}
    used = stats[minecraft_key('used')] if minecraft_key('used') in stats else {}

    df = pd.DataFrame()
    # create unique list of keys from all dictionaries
    df['minecraft_key'] = list(set(broken.keys()) | 
                          set(crafted.keys()) | 
                          set(custom.keys()) | 
                          set(dropped.keys()) | 
                          set(killed.keys()) | 
                          set(mined.keys()) | 
                          set(picked_up.keys()) | 
                          set(used.keys()))
    df = merge_data_frames(df, broken, 'broken')
    df = merge_data_frames(df, crafted, 'crafted')
    df = merge_data_frames(df, custom, 'custom')
    df = merge_data_frames(df, dropped, 'dropped')
    df = merge_data_frames(df, killed, 'killed')
    df = merge_data_frames(df, mined, 'mined')
    df = merge_data_frames(df, picked_up, 'picked_up')
    df = merge_data_frames(df, used, 'used')
    # remove minecraft: prefix
    df['minecraft_key'] =  [re.sub(r'minecraft:','', str(x)) for x in df['minecraft_key']]
    df['world_name'] = path.parts[7]
    df['wood_type'] = df['minecraft_key'].str.extract(r'(dark_oak|birch|oak|acacia|spruce|jungle|mangrove)')
    df = df.fillna(0)
    df = df.astype({'broken': 'int','broken': 'int','crafted': 'int','custom': 'int','dropped': 'int','killed': 'int','mined': 'int','picked_up': 'int','used': 'int'})
    return df

minecraft_stats = pd.DataFrame()
parse_errors = pd.DataFrame(columns=['world_name', 'file_name'])
for file_name in stats_files:
    try:
        minecraft_stats = pd.concat([minecraft_stats, world_stats(file_name)])
    except:
        parse_errors.loc[len(parse_errors.index)] = [file_name.parts[7], file_name.name]

# not currently shown on report:
parse_error_summary = parse_errors.groupby('world_name').count()



nbtpath = Path('/Users/{user}/Library/Application Support/minecraft/saves/'.format(user=local_user))
nbtfiles = list(nbtpath.rglob("*/level.dat"))
world_types = pd.DataFrame(columns = ['world_name', 'hardcore', 'game_type', 'play_time'])

for nbtfile_name in nbtfiles:
    level_data = File.load(nbtfile_name, gzipped=True)
    hardcore = level_data['Data']['hardcore']
    game_type = level_data['Data']['GameType']
    #level_name = level_data['Data']['LevelName']
    # useing folder name in order to join with other datasets
    level_name = nbtfile_name.parts[-2:-1][0]
    play_time = level_data['Data']['Time'] * .05 # convert ticks to seconds
    world_types.loc[len(world_types.index)] = [level_name, hardcore, game_type, play_time]


minecraft_stats = pd.merge(
    minecraft_stats,
    world_types,
    how="left",
    left_on="world_name",
    right_on="world_name"
    )

# Define the conditions and corresponding values for the new column
conditions = [
    (minecraft_stats['hardcore'] == 1),
    (minecraft_stats['game_type'] == 1),
    (minecraft_stats['hardcore'] == 0)
]

game_modes = ['hardcore', 'creative', 'survival']
minecraft_stats['game_mode'] = np.select(conditions, game_modes, default='Unknown')
minecraft_stats = minecraft_stats.drop('hardcore', axis=1)
minecraft_stats = minecraft_stats.drop('game_type', axis=1)
minecraft_stats.head()



Unnamed: 0,minecraft_key,broken,crafted,custom,dropped,killed,mined,picked_up,used,world_name,wood_type,play_time,game_mode
0,diamond_sword,0,4,0,0,0,0,0,301,Biotech,0,45890.25,hardcore
1,red_dye,0,1,0,0,0,0,0,0,Biotech,0,45890.25,hardcore
2,diamond_leggings,0,1,0,0,0,0,0,0,Biotech,0,45890.25,hardcore
3,redstone,0,0,0,0,0,0,161,3,Biotech,0,45890.25,hardcore
4,tropical_fish,0,0,0,0,0,0,4,0,Biotech,0,45890.25,hardcore


In [84]:
game_summary_plot_data = minecraft_stats.groupby(['world_name', 'game_mode', 'play_time'], as_index=True).agg({
    'broken': 'sum', 
    'used': 'sum', 
    'crafted': 'sum', 
    'mined': 'sum', 
    'custom': 'sum', 
    'picked_up': 'sum', 
    'killed': 'sum', 
    'dropped': 'sum'})


game_summary_plot = game_summary_plot_data.hvplot.scatter(
    x='play_time', 
    y='minded', 
    title='Play Time by Mining Stats',
    hover_cols=['world_name'],
    by='game_mode',
    ylabel='Blocks Mined',
    xlabel='Play Time in Seconds',
    legend='top',
    height=500, 
    width=1000
)

game_summary_plot

In [81]:
woods = minecraft_stats.groupby('wood_type').sum()
woods['id'] = woods.index
woods = woods.drop(woods.index[0])
woods_long_format = pd.melt(woods, id_vars=['id'], value_vars=['crafted', 'mined', 'picked_up', 'used'])
woods_long_format['variable'] = woods_long_format['variable'].replace('picked_up', 'picked up')
woods_long_format['id'] = woods_long_format['id'].replace('dark_oak', 'dark oak')
wood_type_plot = woods_long_format.hvplot.bar('id', 'value', 
                             by='variable', 
                             legend='top_left', 
                             height=500, 
                             width=1000,
                             ylabel='Total',
                             xlabel='Wood Type Statistics',
                             rot=60, 
                             cmap='Category20',
                             title='Wood Type Summary for {cnt} Saved Worlds'.format(cnt=minecraft_stats['world_name'].nunique()))


#wood_type_plot



  woods = minecraft_stats.groupby('wood_type').sum()


In [44]:
path = Path('/Users/{user}/Library/Application Support/minecraft/logs/'.format(user=local_user))
files = list(path.rglob("*.log.gz"))
minecraft_log = pd.DataFrame( columns=['log_date', 'log_source', 'log_entry'])
minecraft_log['log_date'] = pd.to_datetime(minecraft_log['log_date'])
for file in files: 
    log_file_parts = re.search(r".*(\d{4}-\d{2}-\d{2}).*", file.name)
    if log_file_parts is not None:
        log_date = log_file_parts.group(1)
        try:
            f = gzip.open('/Users/{}/Library/Application Support/minecraft/logs/{}-2.log.gz'.format(local_user, log_date),'rb')
            for line in f:
                if line.decode()[0] == '[':
                    log_parts = re.search(r"\[(\d\d:\d\d:\d\d)\] \[([^\[]*)\]: (.+)", line.decode())
                    #print(log_parts.group(3))
                    l = log_parts.group(3)
                    
                    try:
                        if re.match(r'.*joined the game$', log_parts.group(3)):
                            ts = pd.Timestamp("{} {}".format(log_date, log_parts.group(1)))
                            minecraft_log.loc[len(minecraft_log.index)] = [ts, log_parts.group(2), log_parts.group(3)]
                            f.close()
                    except:
                        f.close()
        except Exception as e: 
            pass

minecraft_log['user_name'] = [re.sub(r' joined the game$','', str(x)) for x in minecraft_log['log_entry']]
minecraft_log['user_name'] = minecraft_log['user_name'].replace('.*\[CHAT\] ', '', regex=True)
minecraft_log['user_name'] = minecraft_log['user_name'].replace(' \(.*', '', regex=True)
minecraft_log['dates'] = minecraft_log['log_date'].dt.date
minecraft_log['times'] = minecraft_log['log_date'].dt.time
minecraft_log = minecraft_log.rename(columns={"user_name": "User Name"})


In [45]:
log_in_times_plot = minecraft_log.hvplot.scatter(
    x='dates', 
    y='times', 
    title='Minecraft Sessions',
    hover_cols=['log_date'],
    by='User Name',
    xlabel='Log In Date',
    ylabel='Log In Time',
    legend='top',
    height=500, 
    width=1000
)
#log_in_times_plot

In [55]:
from nbtlib import File

path = Path('/Users/{user}/Library/Application Support/minecraft/saves/'.format(user=local_user))
nbtfiles = list(path.rglob("*/level.dat"))
world_types = pd.DataFrame(columns = ['world_name', 'hardcore', 'game_type', 'play_time'])

for nbtfile_name in nbtfiles:
    level_data = File.load(nbtfile_name, gzipped=True)
    hardcore = level_data['Data']['hardcore']
    game_type = level_data['Data']['GameType']
    #level_name = level_data['Data']['LevelName']
    # useing folder name in order to join with other datasets
    level_name = nbtfile_name.parts[-2:-1][0]
    play_time = level_data['Data']['Time'] * .05 # convert ticks to seconds
    world_types.loc[len(world_types.index)] = [level_name, hardcore, game_type, play_time]


In [56]:
#bootstrap = pn.template.BootstrapTemplate(title='Minecraft Summary Stats')
bootstrap = pn.template.BootstrapTemplate(title='Minecraft Summary Stats', theme=pn.template.bootstrap.BootstrapDarkTheme)

md = pn.pane.Markdown("""
This dashboard summarizes Minecraft Java Edition 
statistics based on local saved data. This data aincludes saved worlds on a local
machine, and the Minecraft log files. As of Spring 2023, this is a work in progress
and the plots and layouts are in flux.

Visit the [GitHub page](https://github.com/CulleyHarrelson/MinecraftStats) to
create a version based on your local system.

""")

hardcore_count = world_types[world_types['hardcore'] == 1].count()['hardcore']
creative_count = world_types[world_types['game_type'] == 1].count()['game_type']
world_count = minecraft_stats['world_name'].nunique()
survival_count = world_count - hardcore_count - creative_count

# show parse error count from this
# parse_error_summary

user_count = pn.widgets.StaticText(name='Accounts', value=minecraft_log['User Name'].nunique())
session_count = pn.widgets.StaticText(name='Survival Mode', value=survival_count)
hardcore_count = pn.widgets.StaticText(name='Hardcore Mode', value=hardcore_count)
creative_count = pn.widgets.StaticText(name='Creative Mode', value=creative_count)

bootstrap.sidebar.append(pn.Spacer(height=10))
bootstrap.sidebar.append(md)
bootstrap.sidebar.append(user_count)
bootstrap.sidebar.append(session_count)
bootstrap.sidebar.append(creative_count)
bootstrap.sidebar.append(hardcore_count)
bootstrap.main.append(pn.Spacer(height=10))
bootstrap.main.append(log_in_times_plot)
bootstrap.main.append(pn.Spacer(height=20))
bootstrap.main.append(wood_type_plot)

bootstrap.save(filename='MinecraftStats.html')
