In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
# Load the update data
ark_update_data = pd.read_csv('data/ark/ark_updates_collection.csv')

# Display the first few rows of the data
print(ark_update_data.head())

         Date  Version  Num_Changes  \
0  2023-05-04  v357.18            2   
1  2023-05-04  v357.17            2   
2  2023-04-26  v357.15            1   
3  2023-04-14  v357.14            1   
4  2023-04-12  v357.13            1   

                                             Changes Category  
0  Added a command line unofficial admins can opt...    Minor  
1  Fixed multiple exploits v357.17 - 05/04/2023 -...    Minor  
2   v357.15 - 04/26/2023 - Minor version for servers    Minor  
3   v357.14 - 04/14/2023 - Minor version for servers    Minor  
4   v357.13 - 04/12/2023 - Minor version for servers    Minor  


In [3]:
# Convert the 'Date' column to datetime
ark_update_data['Date'] = pd.to_datetime(ark_update_data['Date'], format='%Y-%m-%d')

# Extract major and minor version numbers
ark_update_data['Major_Version'] = ark_update_data['Version'].str.extract('v(\d+)').astype(float)
ark_update_data['Minor_Version'] = ark_update_data['Version'].str.extract('v\d+\.(\d+)').astype(float)

# Fill NaN values in Minor_Version with 0
ark_update_data['Minor_Version'] = ark_update_data['Minor_Version'].fillna(0)

# Create a combined version number
ark_update_data['Combined_Version'] = ark_update_data['Major_Version'] + ark_update_data['Minor_Version'] / 100

# Sort the dataframe by date
ark_update_data = ark_update_data.sort_values('Date')

# Reset the index
ark_update_data = ark_update_data.reset_index(drop=True)

print(ark_update_data.head())

        Date  Version  Num_Changes  \
0 2019-08-08   v298.3            3   
1 2019-08-23  v298.31            4   
2 2019-08-27  v298.37            3   
3 2019-09-16  v298.40           14   
4 2019-09-18  v298.41            3   

                                             Changes Category  Major_Version  \
0  Added -pvedisallowtribewar flag Added 'cheat t...   Before          298.0   
1  Set beer barrels to check for enemy foundation...    Minor          298.0   
2  Fixed a server crash when using the grapple ho...   Before          298.0   
3  Fixed an exploit with Tek sword dash and zipli...   Before          298.0   
4  Added HLN-A Explorer Notes 1-5 for Genesis Chr...   Before          298.0   

   Minor_Version  Combined_Version  
0            3.0            298.03  
1           31.0            298.31  
2           37.0            298.37  
3           40.0            298.40  
4           41.0            298.41  


In [4]:
# Group by major version and aggregate data
ark_grouped_df = ark_update_data.groupby('Major_Version').agg({
    'Date': ['min', 'max'],
    'Minor_Version': 'count',
    'Num_Changes': 'sum'
}).reset_index()

# Rename columns
ark_grouped_df.columns = ['Major_Version', 'Start_Date', 'End_Date', 'Minor_Versions', 'Total_Changes']

print(ark_grouped_df)

    Major_Version Start_Date   End_Date  Minor_Versions  Total_Changes
0           298.0 2019-08-08 2019-09-18               5             27
1           299.0 2019-10-10 2019-10-10               2              5
2           300.0 2019-10-22 2019-11-05               6             11
3           301.0 2019-11-08 2019-11-08               1              2
4           302.0 2019-11-19 2019-12-04               7             52
5           303.0 2019-12-11 2019-12-11               1             11
6           304.0 2019-12-17 2020-02-06              22             57
7           305.0 2020-02-11 2020-02-11               1              0
8           306.0 2020-02-26 2020-03-05               3             26
9           307.0 2020-03-17 2020-03-24               5             39
10          309.0 2020-03-26 2020-04-03               4             30
11          310.0 2020-04-07 2020-06-10              19             70
12          311.0 2020-06-11 2020-06-19              12             36
13    

In [6]:
# Identify the anomalous versions
anomalous_versions = ark_grouped_df[ark_grouped_df['Major_Version'] > 400]

# Get the date range for the anomalous versions
anomalous_start = anomalous_versions['Start_Date'].min()
anomalous_end = anomalous_versions['End_Date'].max()

# Print the change categories for the anomalous versions
print("Change categories for anomalous versions:")
anomalous_changes = ark_update_data[
    (ark_update_data['Major_Version'] > 400) & 
    (ark_update_data['Date'] >= anomalous_start) & 
    (ark_update_data['Date'] <= anomalous_end)
]
print(anomalous_changes[['Date', 'Version', 'Category', 'Changes']])

# Check for overlapping updates
overlapping_updates = ark_update_data[
    (ark_update_data['Major_Version'] <= 400) & 
    (ark_update_data['Date'] >= anomalous_start) & 
    (ark_update_data['Date'] <= anomalous_end)
]

if not overlapping_updates.empty:
    print("\nOverlapping updates during the same period:")
    print(overlapping_updates[['Date', 'Version', 'Category', 'Changes']])
else:
    print("\nNo overlapping updates found during this period.")

# Print the updates just before and after the anomalous period
buffer_days = 7  # Adjust this to look further before/after if needed
print(f"\nUpdates within {buffer_days} days before the anomalous period:")
before_anomaly = ark_update_data[
    (ark_update_data['Date'] >= anomalous_start - pd.Timedelta(days=buffer_days)) & 
    (ark_update_data['Date'] < anomalous_start)
]
print(before_anomaly[['Date', 'Version', 'Category', 'Changes']])

print(f"\nUpdates within {buffer_days} days after the anomalous period:")
after_anomaly = ark_update_data[
    (ark_update_data['Date'] > anomalous_end) & 
    (ark_update_data['Date'] <= anomalous_end + pd.Timedelta(days=buffer_days))
]
print(after_anomaly[['Date', 'Version', 'Category', 'Changes']])

Change categories for anomalous versions:
          Date  Version Category  \
211 2021-08-31  v678.10   Before   
212 2021-09-02  v678.13   Before   
213 2021-09-08  v678.20   Before   
214 2021-09-10  v678.30   Before   
215 2021-09-10  v678.32   Before   
216 2021-09-10  v678.37   Before   
217 2021-09-14  v678.45   Before   
218 2021-09-15  v678.46   Before   
219 2021-09-17  v678.51   Before   

                                               Changes  
211  Fixed some crashes Fixed some cases where Miss...  
212  Fixed a bug where Mini-HLNA emotes didn't play...  
213       Fixed multiple exploits v678.20 - 09/08/2021  
214                               v678.30 - 09/10/2021  
215                               v678.32 - 09/10/2021  
216  Refactored TEK Dedicated Storage. This is a re...  
217                               v678.45 - 09/14/2021  
218  Fixed a bug where Explorer Notes would not unl...  
219  Fixed a bug where pressing "Spacebar" to bring...  

No overlapping updates fou