Space for intro and the likes


In [1]:
# Install darts library
!pip install darts

Collecting darts
  Downloading darts-0.30.0-py3-none-any.whl.metadata (52 kB)
     ---------------------------------------- 0.0/52.2 kB ? eta -:--:--
     -------------- ----------------------- 20.5/52.2 kB 330.3 kB/s eta 0:00:01
     -------------------------------------- 52.2/52.2 kB 677.4 kB/s eta 0:00:00
Collecting holidays>=0.11.1 (from darts)
  Downloading holidays-0.54-py3-none-any.whl.metadata (23 kB)
Collecting nfoursid>=1.0.0 (from darts)
  Downloading nfoursid-1.0.1-py3-none-any.whl.metadata (1.9 kB)
Collecting pmdarima>=1.8.0 (from darts)
  Downloading pmdarima-2.0.4-cp310-cp310-win_amd64.whl.metadata (8.0 kB)
Collecting pyod>=0.9.5 (from darts)
  Downloading pyod-2.0.1.tar.gz (163 kB)
     ---------------------------------------- 0.0/163.8 kB ? eta -:--:--
     ------------------- ------------------- 81.9/163.8 kB 2.3 MB/s eta 0:00:01
     -------------------------------------- 163.8/163.8 kB 2.5 MB/s eta 0:00:00
  Preparing metadata (setup.py): started
  Preparing metadat

In [15]:
import pandas as pd
from pathlib import Path
from darts import TimeSeries
import numpy as np
import pickle

# Define the path to the pickles directory
PICKLES_PATH = Path('pickles')

def load_pickle(file_path):
    with open(file_path, 'rb') as f:
        return pickle.load(f)

# Load all pickled data
ark_players = load_pickle(PICKLES_PATH / 'ark_players.pkl')
ark_reviews = load_pickle(PICKLES_PATH / 'ark_reviews.pkl')
ark_updates = load_pickle(PICKLES_PATH / 'ark_updates_analysis.pkl')

isle_merged = load_pickle(PICKLES_PATH / 'isle_merged.pkl')
isle_reviews = load_pickle(PICKLES_PATH / 'isle_reviews.pkl')

nms_players = load_pickle(PICKLES_PATH / 'noman_players.pkl')
nms_reviews = load_pickle(PICKLES_PATH / 'nms_reviews.pkl')
nms_updates = load_pickle(PICKLES_PATH / 'nms_updates_analysis.pkl')

In [16]:
#Checking the data that got brought in before merging to one for DARTS
def explore_data(data, name, level=0):
    indent = "  " * level
    print(f"\n{indent}{'='*50}")
    print(f"{indent}{name} Data Exploration")
    print(f"{indent}{'='*50}")
    
    if isinstance(data, pd.DataFrame):
        print(f"{indent}Type: pandas DataFrame")
        print(f"{indent}Shape: {data.shape}")
        print(f"{indent}Columns:")
        for col in data.columns:
            print(f"{indent}- {col}")
        print(f"{indent}Data Types:")
        print(data.dtypes)
        print(f"{indent}First few rows:")
        print(data.head())
        print(f"{indent}Descriptive Statistics:")
        print(data.describe(include='all'))
    
    elif isinstance(data, dict):
        print(f"{indent}Type: Dictionary")
        print(f"{indent}Number of keys: {len(data)}")
        print(f"{indent}Keys:")
        for key, value in data.items():
            print(f"{indent}- {key}")
            if isinstance(value, (pd.DataFrame, dict, list, np.ndarray)):
                explore_data(value, f"{name} - {key}", level+1)
            else:
                print(f"{indent}  Value: {value}")
    
    elif isinstance(data, list):
        print(f"{indent}Type: List")
        print(f"{indent}Length: {len(data)}")
        if len(data) > 0:
            print(f"{indent}First element type: {type(data[0])}")
            if len(data) > 5:
                print(f"{indent}First 5 elements: {data[:5]}")
            else:
                print(f"{indent}All elements: {data}")
    
    elif isinstance(data, np.ndarray):
        print(f"{indent}Type: NumPy Array")
        print(f"{indent}Shape: {data.shape}")
        print(f"{indent}Data Type: {data.dtype}")
        if data.ndim == 1 and len(data) <= 10:
            print(f"{indent}Values: {data}")
        elif data.ndim == 2 and data.shape[0] <= 5 and data.shape[1] <= 5:
            print(f"{indent}Values:\n{data}")
        else:
            print(f"{indent}Array is too large to display fully.")
    
    else:
        print(f"{indent}Type: {type(data)}")
        print(f"{indent}Value: {data}")
    
    print(f"{indent}{'='*50}\n")

# Explore each dataset before merging
print("Exploring datasets before merging:")

print("\nARK Datasets:")
explore_data(ark_players, "ARK Players")
explore_data(ark_reviews, "ARK Reviews")
explore_data(ark_updates, "ARK Updates")

print("\nThe Isle Datasets:")
explore_data(isle_merged, "The Isle Merged")
explore_data(isle_reviews, "The Isle Reviews")

print("\nNo Man's Sky Datasets:")
explore_data(nms_players, "No Man's Sky Players")
explore_data(nms_reviews, "No Man's Sky Reviews")
explore_data(nms_updates, "No Man's Sky Updates")

print("\nExploration of individual datasets complete.")

Exploring datasets before merging:

ARK Datasets:

ARK Players Data Exploration
Type: pandas DataFrame
Shape: (110, 3)
Columns:
- Month
- Avg Players
- Peak Players
Data Types:
Month           period[M]
Avg Players       float64
Peak Players        int64
dtype: object
First few rows:
     Month  Avg Players  Peak Players
0  2024-06     23631.25         39597
1  2024-05     21754.11         38243
2  2024-04     22072.68         38737
3  2024-03     21597.90         36713
4  2024-02     21550.53         37022
Descriptive Statistics:
          Month   Avg Players   Peak Players
count       110    110.000000     110.000000
unique      110           NaN            NaN
top     2024-06           NaN            NaN
freq          1           NaN            NaN
mean        NaN  43300.954818   75048.872727
std         NaN  13757.041360   28037.720389
min         NaN      5.280000      28.000000
25%         NaN  35413.797500   61141.750000
50%         NaN  42081.220000   72137.000000
75%         N