# JetBrains IDE Release Date Predictions

**From:** https://towardsdatascience.com/forecasting-of-periodic-events-with-ml-5081db493c46


**Summary:** I'll save you soome time from reading - this yielded no useful results. There are going to be releases on Jan 30th in 2023 and 2024. Not helpful.

## 0. Imports

In [None]:
# Data manipulation
import pandas as pd
# Manipulation with dates
from datetime import date
from dateutil.relativedelta import relativedelta
# Machine learning
import xgboost as xgb
from sklearn import metrics
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier

  from pandas import MultiIndex, Int64Index


## 1. Get Input Data

In [None]:
from typing import List

import numpy as np
import pandas as pd
import requests
from packaging.version import Version

IDES = [
    "IIU",  # IntelliJ IDEA Ultimate
]


def get_data(ides: List[str]):
    ide_str = "%2C".join(ides)
    url = f'https://data.services.jetbrains.com/products?code={ide_str}&release.type=release'
    return requests.get(url=url).json()


ide = get_data(IDES)[0]
code = ide['code']
name = ide['name']

In [None]:
# Create a DataFrame to store the dates
df = pd.DataFrame.from_dict(ide['releases'])

df['code'] = code
df['name'] = name
for x in ['downloads','type','patches','notesLink','licenseRequired','whatsnew','uninstallFeedbackLinks','printableReleaseType']:
    del df[x]

df['major'] = df['version'].apply(lambda x: Version(x).major)
df['minor'] = df['version'].apply(lambda x: Version(x).minor)
df['micro'] = df['version'].apply(lambda x: Version(x).micro)
df['Date'] = pd.to_datetime(df['date'])
del df['date']
df['Release'] = 1

df.head(3)

Unnamed: 0,version,majorVersion,build,code,name,major,minor,micro,Date,Release
0,2022.1.1,2022.1,221.5591.52,IIU,IntelliJ IDEA Ultimate,2022,1,1,2022-05-11,1
1,2022.1,2022.1,221.5080.210,IIU,IntelliJ IDEA Ultimate,2022,1,0,2022-04-12,1
2,2021.3.3,2021.3,213.7172.25,IIU,IntelliJ IDEA Ultimate,2021,3,3,2022-03-17,1


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 171 entries, 0 to 170
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   version       171 non-null    object        
 1   majorVersion  171 non-null    object        
 2   build         171 non-null    object        
 3   code          171 non-null    object        
 4   name          171 non-null    object        
 5   major         171 non-null    int64         
 6   minor         171 non-null    int64         
 7   micro         171 non-null    int64         
 8   Date          171 non-null    datetime64[ns]
 9   Release       171 non-null    int64         
dtypes: datetime64[ns](1), int64(4), object(5)
memory usage: 13.5+ KB


In [None]:
print(df['Date'].min(), df['Date'].max())

2012-02-02 00:00:00 2022-05-11 00:00:00


In [54]:
data = df[['Date','Release']]
data = data.drop_duplicates()

r = pd.date_range(start=data['Date'].min(), end=data['Date'].max())

data = data.set_index('Date').reindex(r).fillna(0.0).rename_axis('Date').reset_index()

data

Unnamed: 0,Date,Release
0,2012-02-02,1.0
1,2012-02-03,0.0
2,2012-02-04,0.0
3,2012-02-05,0.0
4,2012-02-06,0.0
...,...,...
3747,2022-05-07,0.0
3748,2022-05-08,0.0
3749,2022-05-09,0.0
3750,2022-05-10,0.0


In [55]:
data['Month'] = data['Date'].dt.month
data['Day'] = data['Date'].dt.day
data['Workday_N'] = np.busday_count(
                    data['Date'].values.astype('datetime64[M]'),
                    data['Date'].values.astype('datetime64[D]'))
data['Week_day'] = data['Date'].dt.weekday
data['Week_of_month'] = (data['Date'].dt.day - data['Date'].dt.weekday - 2) // 7 + 2
data['Weekday_order'] = (data['Date'].dt.day + 6) // 7
data = data.set_index('Date')
data

Unnamed: 0_level_0,Release,Month,Day,Workday_N,Week_day,Week_of_month,Weekday_order
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2012-02-02,1.0,2,2,1,3,1,1
2012-02-03,0.0,2,3,2,4,1,1
2012-02-04,0.0,2,4,3,5,1,1
2012-02-05,0.0,2,5,3,6,1,1
2012-02-06,0.0,2,6,3,0,2,1
...,...,...,...,...,...,...,...
2022-05-07,0.0,5,7,5,5,2,1
2022-05-08,0.0,5,8,5,6,2,2
2022-05-09,0.0,5,9,5,0,3,2
2022-05-10,0.0,5,10,6,1,3,2


In [56]:
x_train, x_test, y_train, y_test = train_test_split(
    data.drop(['Release'], axis=1),
    data['Release'],
    test_size=0.3,
    random_state=1,
    shuffle=False,
)

In [57]:
DM_train = xgb.DMatrix(data=x_train, label=y_train)
grid_param = {"learning_rate": [0.01, 0.1],
              "n_estimators": [100, 150, 200],
              "alpha": [0.1, 0.5, 1],
              "max_depth": [2, 3, 4]}
model = xgb.XGBRegressor()
grid_mse = GridSearchCV(estimator=model, param_grid=grid_param,
                       scoring="neg_mean_squared_error",
                       cv=4, verbose=1)
grid_mse.fit(x_train, y_train)
print("Best parameters found: ", grid_mse.best_params_)
print("Lowest RMSE found: ", np.sqrt(np.abs(grid_mse.best_score_)))

  from pandas import MultiIndex, Int64Index
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index


Fitting 4 folds for each of 54 candidates, totalling 216 fits


  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex,

Best parameters found:  {'alpha': 1, 'learning_rate': 0.1, 'max_depth': 2, 'n_estimators': 100}
Lowest RMSE found:  0.20726742139047416


[Parallel(n_jobs=1)]: Done 216 out of 216 | elapsed:   11.7s finished
  from pandas import MultiIndex, Int64Index


In [68]:
xgb_model = xgb.XGBClassifier(
    objective ='reg:squarederror', 
    colsample_bytree = 1, 
    learning_rate = 0.1,
    max_depth = 4, 
    alpha = 0.5, 
    n_estimators = 200,
)
xgb_model.fit(x_train, y_train)
xgb_prediction = xgb_model.predict(x_test)



In [69]:
knn = KNeighborsClassifier(
    n_neighbors = 3,
    algorithm = 'auto',     
    weights = 'distance',
)
knn.fit(x_train, y_train)  
knn_prediction = knn.predict(x_test)

In [70]:
random_forest = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    random_state=1,
)
random_forest.fit(x_train, y_train)
rf_prediction = random_forest.predict(x_test)

In [71]:
xgb_matrix = metrics.confusion_matrix(xgb_prediction, y_test)
print(f"""
Confusion matrix for XGBoost model:
TN:{xgb_matrix[0][0]}    FN:{xgb_matrix[0][1]}
FP:{xgb_matrix[1][0]}    TP:{xgb_matrix[1][1]}""")
knn_matrix = metrics.confusion_matrix(knn_prediction, y_test)
print(f"""
Confusion matrix for KNN model:
TN:{knn_matrix[0][0]}    FN:{knn_matrix[0][1]}
FP:{knn_matrix[1][0]}    TP:{knn_matrix[1][1]}""")
rf_matrix = metrics.confusion_matrix(rf_prediction, y_test)
print(f"""
Confusion matrix for Random Forest model:
TN:{rf_matrix[0][0]}    FN:{rf_matrix[0][1]}
FP:{rf_matrix[1][0]}    TP:{rf_matrix[1][1]}""")


Confusion matrix for XGBoost model:
TN:1081    FN:45
FP:0    TP:0

Confusion matrix for KNN model:
TN:1057    FN:43
FP:24    TP:2

Confusion matrix for Random Forest model:
TN:1079    FN:45
FP:2    TP:0


In [72]:
x_predict = pd.DataFrame(pd.date_range(date.today(), (date.today() +
            relativedelta(years=2)),freq='d'), columns=['Date'])
x_predict['Month'] = x_predict['Date'].dt.month
x_predict['Day'] = x_predict['Date'].dt.day
x_predict['Workday_N'] = np.busday_count(
                x_predict['Date'].values.astype('datetime64[M]'),
                x_predict['Date'].values.astype('datetime64[D]'))
x_predict['Week_day'] = x_predict['Date'].dt.weekday
x_predict['Week_of_month'] = (x_predict['Date'].dt.day - 
                              x_predict['Date'].dt.weekday - 2)//7+2
x_predict['Weekday_order'] = (x_predict['Date'].dt.day + 6) // 7
x_predict = x_predict.set_index('Date')
prediction = xgb_model.predict(x_predict)

  from pandas import MultiIndex, Int64Index


In [76]:
prediction[:10]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [75]:
pd.Series(prediction).value_counts()

0.0    730
1.0      2
dtype: int64

In [81]:
x_predict['p'] = prediction
x_predict[x_predict['p'] == 1]

Unnamed: 0_level_0,Month,Day,Workday_N,Week_day,Week_of_month,Weekday_order,p
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2023-01-31,1,31,21,1,6,5,1.0
2024-01-30,1,30,21,1,5,5,1.0


In [34]:
data = pd.DataFrame({'Date': ['2021-01-26','2020-12-22',
                     '2020-11-24','2020-10-27','2020-09-29',
                     '2020-08-25','2020-07-28','2020-06-30',
                     '2020-05-26','2020-04-28','2020-03-31',
                     '2020-02-25','2020-01-28','2019-12-31',
                     '2019-11-26','2019-10-29','2019-09-24',
                     '2019-08-27','2019-07-30','2019-06-25',
                     '2019-05-28']})

In [35]:
data['Date'] = pd.to_datetime(data['Date'])
data['Release'] = 1

In [36]:
r = pd.date_range(start=data['Date'].min(), end=data['Date'].max())
data = data.set_index('Date').reindex(r).fillna(0.0).rename_axis('Date').reset_index()

In [37]:
data['Month'] = data['Date'].dt.month
data['Day'] = data['Date'].dt.day
data['Workday_N'] = np.busday_count(
                    data['Date'].values.astype('datetime64[M]'),
                    data['Date'].values.astype('datetime64[D]'))
data['Week_day'] = data['Date'].dt.weekday
data['Week_of_month'] = (data['Date'].dt.day 
                         - data['Date'].dt.weekday - 2) // 7 + 2
data['Weekday_order'] = (data['Date'].dt.day + 6) // 7
data = data.set_index('Date')

In [38]:
x_train, x_test, y_train, y_test = train_test_split(data.drop(['Release'], axis=1), data['Release'],
                 test_size=0.3, random_state=1, shuffle=False)

In [39]:
DM_train = xgb.DMatrix(data=x_train, label=y_train)
grid_param = {"learning_rate": [0.01, 0.1],
              "n_estimators": [100, 150, 200],
              "alpha": [0.1, 0.5, 1],
              "max_depth": [2, 3, 4]}
model = xgb.XGBRegressor()
grid_mse = GridSearchCV(estimator=model, param_grid=grid_param,
                       scoring="neg_mean_squared_error",
                       cv=4, verbose=1)
grid_mse.fit(x_train, y_train)
print("Best parameters found: ", grid_mse.best_params_)
print("Lowest RMSE found: ", np.sqrt(np.abs(grid_mse.best_score_)))

  from pandas import MultiIndex, Int64Index
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index


Fitting 4 folds for each of 54 candidates, totalling 216 fits


  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex,

Best parameters found:  {'alpha': 0.5, 'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 200}
Lowest RMSE found:  0.0534237785172052


  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
[Parallel(n_jobs=1)]: Done 216 out of 216 | elapsed:    6.3s finished
  from pandas import MultiIndex, Int64Index


In [40]:
xgb_model = xgb.XGBClassifier(objective ='reg:squarederror', 
                            colsample_bytree = 1, 
                            learning_rate = 0.1,
                            max_depth = 4, 
                            alpha = 0.5, 
                            n_estimators = 200)
xgb_model.fit(x_train, y_train)
xgb_prediction = xgb_model.predict(x_test)



In [41]:
knn = KNeighborsClassifier(n_neighbors = 3, algorithm = 'auto',     
                           weights = 'distance') 
knn.fit(x_train, y_train)  
knn_prediction = knn.predict(x_test)

In [42]:
random_forest = RandomForestClassifier(n_estimators=50,
                                       max_depth=10, random_state=1)
random_forest.fit(x_train, y_train)
rf_prediction = random_forest.predict(x_test)

In [43]:
xgb_matrix = metrics.confusion_matrix(xgb_prediction, y_test)
print(f"""
Confusion matrix for XGBoost model:
TN:{xgb_matrix[0][0]}    FN:{xgb_matrix[0][1]}
FP:{xgb_matrix[1][0]}    TP:{xgb_matrix[1][1]}""")
knn_matrix = metrics.confusion_matrix(knn_prediction, y_test)
print(f"""
Confusion matrix for KNN model:
TN:{knn_matrix[0][0]}    FN:{knn_matrix[0][1]}
FP:{knn_matrix[1][0]}    TP:{knn_matrix[1][1]}""")
rf_matrix = metrics.confusion_matrix(rf_prediction, y_test)
print(f"""
Confusion matrix for Random Forest model:
TN:{rf_matrix[0][0]}    FN:{rf_matrix[0][1]}
FP:{rf_matrix[1][0]}    TP:{rf_matrix[1][1]}""")


Confusion matrix for XGBoost model:
TN:175    FN:2
FP:1    TP:5

Confusion matrix for KNN model:
TN:175    FN:7
FP:1    TP:0

Confusion matrix for Random Forest model:
TN:175    FN:2
FP:1    TP:5


In [45]:
x_predict = pd.DataFrame(pd.date_range(date.today(), (date.today() +
            relativedelta(years=1)),freq='d'), columns=['Date'])
x_predict['Month'] = x_predict['Date'].dt.month
x_predict['Day'] = x_predict['Date'].dt.day
x_predict['Workday_N'] = np.busday_count(
                x_predict['Date'].values.astype('datetime64[M]'),
                x_predict['Date'].values.astype('datetime64[D]'))
x_predict['Week_day'] = x_predict['Date'].dt.weekday
x_predict['Week_of_month'] = (x_predict['Date'].dt.day - 
                              x_predict['Date'].dt.weekday - 2)//7+2
x_predict['Weekday_order'] = (x_predict['Date'].dt.day + 6) // 7
x_predict = x_predict.set_index('Date')
prediction = xgb_model.predict(x_predict)

  from pandas import MultiIndex, Int64Index


In [46]:
prediction

array([0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0.