# Dependency of Lifetime and the Indicator Environment

In [1]:
# Package import
import numpy as np
import pandas as pd 
import json
import math 
from datetime import datetime, timedelta
from IPython.display import HTML, display
import matplotlib.pyplot as plt
import seaborn as sns
from msticpy.vis import mp_pandas_plot
from msticpy.vis.data_viewer import DataViewer
from scipy import stats
from sklearn.model_selection import train_test_split

# Disable warnings
import warnings
warnings.filterwarnings("ignore")
warnings.simplefilter("ignore")

## General Malware Indicator Dataset

In [2]:
# Remove all rows without a last seen
urlData = pd.read_csv('.\Data\Threat_Fox\\threat_fox_urls.csv',sep=',', skipinitialspace=True)
urlData = urlData.loc[urlData["last_seen_utc"].notna()]

ipData = pd.read_csv('.\Data\Threat_Fox\\threat_fox_ip.csv',sep=',', skipinitialspace=True)
ipData = ipData.loc[ipData["last_seen_utc"].notna()]

hashData = pd.read_csv('.\Data\Threat_Fox\\threat_fox_md5.csv',sep=',', skipinitialspace=True)
hashData = hashData.loc[hashData["last_seen_utc"].notna()]

In [3]:
# Remove all rows where Indicator is younger then one year
present = datetime.now()
past = timedelta(days=31)
year = present - past

urlData["last_seen_utc"] = urlData["last_seen_utc"].apply(lambda x: pd.to_datetime(x))
urlData["first_seen_utc"] = urlData["first_seen_utc"].apply(lambda x: pd.to_datetime(x))
urlData = urlData[urlData["last_seen_utc"] < year]
urlData = urlData[urlData["last_seen_utc"] != urlData["first_seen_utc"]]

ipData["last_seen_utc"] = ipData["last_seen_utc"].apply(lambda x: pd.to_datetime(x))
ipData["first_seen_utc"] = ipData["first_seen_utc"].apply(lambda x: pd.to_datetime(x))
ipData = ipData[ipData["last_seen_utc"] < year]
ipData = ipData[ipData["last_seen_utc"] != ipData["first_seen_utc"]]

hashData["last_seen_utc"] = hashData["last_seen_utc"].apply(lambda x: pd.to_datetime(x))
hashData["first_seen_utc"] = hashData["first_seen_utc"].apply(lambda x: pd.to_datetime(x))
hashData = hashData[hashData["last_seen_utc"] < year]
hashData = hashData[hashData["last_seen_utc"] != hashData["first_seen_utc"]]

In [4]:
# Calculate lifetime and delete negative values
urlData['lifetimeHours'] = (urlData["last_seen_utc"] - urlData["first_seen_utc"]) / pd.Timedelta(hours=1)
urlData = urlData[urlData['lifetimeHours'] >= 0].dropna()

ipData['lifetimeHours'] = (ipData["last_seen_utc"] - ipData["first_seen_utc"]) / pd.Timedelta(hours=1)
ipData = ipData[ipData['lifetimeHours'] >= 0].dropna()

hashData['lifetimeHours'] = (hashData["last_seen_utc"] - hashData["first_seen_utc"]) / pd.Timedelta(hours=1)
hashData = hashData[hashData['lifetimeHours'] >= 0].dropna()

In [5]:
fullData = pd.concat([urlData, hashData, ipData], axis=0)
fullData.shape[0]

127448

### Percentile

In [6]:
# Display information
average = [["10% Lifetime",timedelta(hours=np.percentile(fullData["lifetimeHours"], 10))], 
           ["20% Lifetime",timedelta(hours=np.percentile(fullData["lifetimeHours"], 20))], 
           ["30% Lifetime",timedelta(hours=np.percentile(fullData["lifetimeHours"], 30))],
           ["40% Lifetime",timedelta(hours=np.percentile(fullData["lifetimeHours"], 40))], 
           ["50% Lifetime",timedelta(hours=np.percentile(fullData["lifetimeHours"], 50))],
           ["60% Lifetime",timedelta(hours=np.percentile(fullData["lifetimeHours"], 60))],
           ["70% Lifetime",timedelta(hours=np.percentile(fullData["lifetimeHours"], 70))],
           ["80% Lifetime",timedelta(hours=np.percentile(fullData["lifetimeHours"], 80))],
           ["90% Lifetime",timedelta(hours=np.percentile(fullData["lifetimeHours"], 90))],
           ["100% Lifetime",timedelta(hours=np.percentile(fullData["lifetimeHours"], 100))]]

display(HTML(
   "<table><tr>{}</tr></table>".format(
       "</tr><tr>".join(
           "<td>{}</td>".format("</td><td>".join(str(_) for _ in row)) for row in average))
))

0,1
10% Lifetime,0:55:48
20% Lifetime,2:44:00
30% Lifetime,4:30:02
40% Lifetime,7:39:17
50% Lifetime,18:49:52
60% Lifetime,"3 days, 13:25:04"
70% Lifetime,"3 days, 22:22:17.800000"
80% Lifetime,"12 days, 0:00:31"
90% Lifetime,"102 days, 23:54:52.700000"
100% Lifetime,"1364 days, 6:11:49"


In [7]:
# Split1 = 50/50
split1_50_train, split1_50_test = train_test_split(fullData, test_size=0.5)

# Split1 = 70/30
split2_30_train, split2_70_test = train_test_split(fullData, test_size=0.7)

# Split1 = 30/70
split3_70_train, split3_30_test = train_test_split(fullData, test_size=0.3)

### Split 1

In [8]:
# Display information
average = [["10% Lifetime","Train: " +  str(timedelta(hours=np.percentile(split1_50_train["lifetimeHours"], 10))) + " Test: " +  str(timedelta(hours=np.percentile(split1_50_test["lifetimeHours"], 10)))], 
           ["20% Lifetime","Train: " +  str(timedelta(hours=np.percentile(split1_50_train["lifetimeHours"], 20))) + " Test: " +  str(timedelta(hours=np.percentile(split1_50_test["lifetimeHours"], 20)))], 
           ["30% Lifetime","Train: " +  str(timedelta(hours=np.percentile(split1_50_train["lifetimeHours"], 30))) + " Test: " +  str(timedelta(hours=np.percentile(split1_50_test["lifetimeHours"], 30)))],
           ["40% Lifetime","Train: " +  str(timedelta(hours=np.percentile(split1_50_train["lifetimeHours"], 40))) + " Test: " +  str(timedelta(hours=np.percentile(split1_50_test["lifetimeHours"], 40)))], 
           ["50% Lifetime","Train: " +  str(timedelta(hours=np.percentile(split1_50_train["lifetimeHours"], 50))) + " Test: " +  str(timedelta(hours=np.percentile(split1_50_test["lifetimeHours"], 50)))],
           ["60% Lifetime","Train: " +  str(timedelta(hours=np.percentile(split1_50_train["lifetimeHours"], 60))) + " Test: " +  str(timedelta(hours=np.percentile(split1_50_test["lifetimeHours"], 60)))],
           ["70% Lifetime","Train: " +  str(timedelta(hours=np.percentile(split1_50_train["lifetimeHours"], 70))) + " Test: " +  str(timedelta(hours=np.percentile(split1_50_test["lifetimeHours"], 70)))],
           ["80% Lifetime","Train: " +  str(timedelta(hours=np.percentile(split1_50_train["lifetimeHours"], 80))) + " Test: " +  str(timedelta(hours=np.percentile(split1_50_test["lifetimeHours"], 80)))],
           ["90% Lifetime","Train: " +  str(timedelta(hours=np.percentile(split1_50_train["lifetimeHours"], 90))) + " Test: " +  str(timedelta(hours=np.percentile(split1_50_test["lifetimeHours"], 90)))],
           ["100% Lifetime","Train: " +  str(timedelta(hours=np.percentile(split1_50_train["lifetimeHours"], 100))) + " Test: " +  str(timedelta(hours=np.percentile(split1_50_test["lifetimeHours"], 100)))]]

display(HTML(
   "<table><tr>{}</tr></table>".format(
       "</tr><tr>".join(
           "<td>{}</td>".format("</td><td>".join(str(_) for _ in row)) for row in average))
))

0,1
10% Lifetime,Train: 0:55:48 Test: 0:55:47
20% Lifetime,Train: 2:43:40 Test: 2:44:55.400000
30% Lifetime,Train: 4:30:08 Test: 4:29:51.900000
40% Lifetime,Train: 7:39:25.200000 Test: 7:39:11
50% Lifetime,Train: 18:29:46 Test: 19:00:04.500000
60% Lifetime,"Train: 3 days, 13:19:18.600000 Test: 3 days, 13:27:31.800000"
70% Lifetime,"Train: 3 days, 22:08:46.900000 Test: 3 days, 22:31:39"
80% Lifetime,"Train: 11 days, 17:03:21 Test: 12 days, 6:40:12.200000"
90% Lifetime,"Train: 98 days, 22:05:39 Test: 107 days, 0:07:48.800000"
100% Lifetime,"Train: 1364 days, 6:11:49 Test: 1364 days, 6:11:49"


In [9]:
# Display information
tf_1 = ((timedelta(hours=np.percentile(split1_50_train["lifetimeHours"], 10)) -  timedelta(hours=np.percentile(split1_50_test["lifetimeHours"], 10))) +
(timedelta(hours=np.percentile(split1_50_train["lifetimeHours"], 20)) -  timedelta(hours=np.percentile(split1_50_test["lifetimeHours"], 20))) +
(timedelta(hours=np.percentile(split1_50_train["lifetimeHours"], 30)) -  timedelta(hours=np.percentile(split1_50_test["lifetimeHours"], 30))) +
(timedelta(hours=np.percentile(split1_50_train["lifetimeHours"], 40)) -  timedelta(hours=np.percentile(split1_50_test["lifetimeHours"], 40))) +
(timedelta(hours=np.percentile(split1_50_train["lifetimeHours"], 50)) -  timedelta(hours=np.percentile(split1_50_test["lifetimeHours"], 50))) +
(timedelta(hours=np.percentile(split1_50_train["lifetimeHours"], 60)) -  timedelta(hours=np.percentile(split1_50_test["lifetimeHours"], 60))) +
(timedelta(hours=np.percentile(split1_50_train["lifetimeHours"], 70)) -  timedelta(hours=np.percentile(split1_50_test["lifetimeHours"], 70))) +
(timedelta(hours=np.percentile(split1_50_train["lifetimeHours"], 80)) -  timedelta(hours=np.percentile(split1_50_test["lifetimeHours"], 80))) +
(timedelta(hours=np.percentile(split1_50_train["lifetimeHours"], 90)) -  timedelta(hours=np.percentile(split1_50_test["lifetimeHours"], 90))) +
(timedelta(hours=np.percentile(split1_50_train["lifetimeHours"], 100)) -  timedelta(hours=np.percentile(split1_50_test["lifetimeHours"], 100)))) / 10
print(tf_1)

-1 day, 3:07:53.110000


### Split 2

In [10]:
# Display information
average = [["10% Lifetime","Train: " +  str(timedelta(hours=np.percentile(split2_30_train["lifetimeHours"], 10))) + " Test: " +  str(timedelta(hours=np.percentile(split2_70_test["lifetimeHours"], 10)))], 
           ["20% Lifetime","Train: " +  str(timedelta(hours=np.percentile(split2_30_train["lifetimeHours"], 20))) + " Test: " +  str(timedelta(hours=np.percentile(split2_70_test["lifetimeHours"], 20)))], 
           ["30% Lifetime","Train: " +  str(timedelta(hours=np.percentile(split2_30_train["lifetimeHours"], 30))) + " Test: " +  str(timedelta(hours=np.percentile(split2_70_test["lifetimeHours"], 30)))],
           ["40% Lifetime","Train: " +  str(timedelta(hours=np.percentile(split2_30_train["lifetimeHours"], 40))) + " Test: " +  str(timedelta(hours=np.percentile(split2_70_test["lifetimeHours"], 40)))], 
           ["50% Lifetime","Train: " +  str(timedelta(hours=np.percentile(split2_30_train["lifetimeHours"], 50))) + " Test: " +  str(timedelta(hours=np.percentile(split2_70_test["lifetimeHours"], 50)))],
           ["60% Lifetime","Train: " +  str(timedelta(hours=np.percentile(split2_30_train["lifetimeHours"], 60))) + " Test: " +  str(timedelta(hours=np.percentile(split2_70_test["lifetimeHours"], 60)))],
           ["70% Lifetime","Train: " +  str(timedelta(hours=np.percentile(split2_30_train["lifetimeHours"], 70))) + " Test: " +  str(timedelta(hours=np.percentile(split2_70_test["lifetimeHours"], 70)))],
           ["80% Lifetime","Train: " +  str(timedelta(hours=np.percentile(split2_30_train["lifetimeHours"], 80))) + " Test: " +  str(timedelta(hours=np.percentile(split2_70_test["lifetimeHours"], 80)))],
           ["90% Lifetime","Train: " +  str(timedelta(hours=np.percentile(split2_30_train["lifetimeHours"], 90))) + " Test: " +  str(timedelta(hours=np.percentile(split2_70_test["lifetimeHours"], 90)))],
           ["100% Lifetime","Train: " +  str(timedelta(hours=np.percentile(split2_30_train["lifetimeHours"], 100))) + " Test: " +  str(timedelta(hours=np.percentile(split2_70_test["lifetimeHours"], 100)))]]

display(HTML(
   "<table><tr>{}</tr></table>".format(
       "</tr><tr>".join(
           "<td>{}</td>".format("</td><td>".join(str(_) for _ in row)) for row in average))
))

0,1
10% Lifetime,Train: 0:55:58 Test: 0:55:46
20% Lifetime,Train: 2:42:24 Test: 2:44:36.200000
30% Lifetime,Train: 4:29:44 Test: 4:30:06
40% Lifetime,Train: 7:38:46.800000 Test: 7:39:27
50% Lifetime,Train: 19:30:32.500000 Test: 18:04:36.500000
60% Lifetime,"Train: 3 days, 13:26:06.800000 Test: 3 days, 13:24:51.800000"
70% Lifetime,"Train: 3 days, 20:55:02.100000 Test: 3 days, 22:31:46.100000"
80% Lifetime,"Train: 11 days, 19:18:07.400000 Test: 12 days, 1:13:20.200000"
90% Lifetime,"Train: 101 days, 14:23:10.700000 Test: 103 days, 12:51:15.500000"
100% Lifetime,"Train: 1364 days, 6:11:49 Test: 1364 days, 6:11:49"


In [11]:
# Display information
tf_2 = ((timedelta(hours=np.percentile(split2_30_train["lifetimeHours"], 10)) -  timedelta(hours=np.percentile(split2_70_test["lifetimeHours"], 10))) +
(timedelta(hours=np.percentile(split2_30_train["lifetimeHours"], 20)) -  timedelta(hours=np.percentile(split2_70_test["lifetimeHours"], 20))) +
(timedelta(hours=np.percentile(split2_30_train["lifetimeHours"], 30)) -  timedelta(hours=np.percentile(split2_70_test["lifetimeHours"], 30))) +
(timedelta(hours=np.percentile(split2_30_train["lifetimeHours"], 40)) -  timedelta(hours=np.percentile(split2_70_test["lifetimeHours"], 40))) +
(timedelta(hours=np.percentile(split2_30_train["lifetimeHours"], 50)) -  timedelta(hours=np.percentile(split2_70_test["lifetimeHours"], 50))) +
(timedelta(hours=np.percentile(split2_30_train["lifetimeHours"], 60)) -  timedelta(hours=np.percentile(split2_70_test["lifetimeHours"], 60))) +
(timedelta(hours=np.percentile(split2_30_train["lifetimeHours"], 70)) -  timedelta(hours=np.percentile(split2_70_test["lifetimeHours"], 70))) +
(timedelta(hours=np.percentile(split2_30_train["lifetimeHours"], 80)) -  timedelta(hours=np.percentile(split2_70_test["lifetimeHours"], 80))) +
(timedelta(hours=np.percentile(split2_30_train["lifetimeHours"], 90)) -  timedelta(hours=np.percentile(split2_70_test["lifetimeHours"], 90))) +
(timedelta(hours=np.percentile(split2_30_train["lifetimeHours"], 100)) -  timedelta(hours=np.percentile(split2_70_test["lifetimeHours"], 100)))) / 10
print(tf_2)

-1 day, 18:44:24.700000


### Split 3

In [12]:
# Display information
average = [["10% Lifetime","Train: " +  str(timedelta(hours=np.percentile(split3_70_train["lifetimeHours"], 10))) + " Test: " +  str(timedelta(hours=np.percentile(split3_30_test["lifetimeHours"], 10)))], 
           ["20% Lifetime","Train: " +  str(timedelta(hours=np.percentile(split3_70_train["lifetimeHours"], 20))) + " Test: " +  str(timedelta(hours=np.percentile(split3_30_test["lifetimeHours"], 20)))], 
           ["30% Lifetime","Train: " +  str(timedelta(hours=np.percentile(split3_70_train["lifetimeHours"], 30))) + " Test: " +  str(timedelta(hours=np.percentile(split3_30_test["lifetimeHours"], 30)))],
           ["40% Lifetime","Train: " +  str(timedelta(hours=np.percentile(split3_70_train["lifetimeHours"], 40))) + " Test: " +  str(timedelta(hours=np.percentile(split3_30_test["lifetimeHours"], 40)))], 
           ["50% Lifetime","Train: " +  str(timedelta(hours=np.percentile(split3_70_train["lifetimeHours"], 50))) + " Test: " +  str(timedelta(hours=np.percentile(split3_30_test["lifetimeHours"], 50)))],
           ["60% Lifetime","Train: " +  str(timedelta(hours=np.percentile(split3_70_train["lifetimeHours"], 60))) + " Test: " +  str(timedelta(hours=np.percentile(split3_30_test["lifetimeHours"], 60)))],
           ["70% Lifetime","Train: " +  str(timedelta(hours=np.percentile(split3_70_train["lifetimeHours"], 70))) + " Test: " +  str(timedelta(hours=np.percentile(split3_30_test["lifetimeHours"], 70)))],
           ["80% Lifetime","Train: " +  str(timedelta(hours=np.percentile(split3_70_train["lifetimeHours"], 80))) + " Test: " +  str(timedelta(hours=np.percentile(split3_30_test["lifetimeHours"], 80)))],
           ["90% Lifetime","Train: " +  str(timedelta(hours=np.percentile(split3_70_train["lifetimeHours"], 90))) + " Test: " +  str(timedelta(hours=np.percentile(split3_30_test["lifetimeHours"], 90)))],
           ["100% Lifetime","Train: " +  str(timedelta(hours=np.percentile(split3_70_train["lifetimeHours"], 100))) + " Test: " +  str(timedelta(hours=np.percentile(split3_30_test["lifetimeHours"], 100)))]]

display(HTML(
   "<table><tr>{}</tr></table>".format(
       "</tr><tr>".join(
           "<td>{}</td>".format("</td><td>".join(str(_) for _ in row)) for row in average))
))

0,1
10% Lifetime,Train: 0:55:45 Test: 0:56:00
20% Lifetime,Train: 2:43:08.800000 Test: 2:45:48
30% Lifetime,Train: 4:29:45 Test: 4:30:27
40% Lifetime,Train: 7:37:47 Test: 7:40:16.600000
50% Lifetime,Train: 17:58:18 Test: 19:57:07
60% Lifetime,"Train: 3 days, 13:24:56.200000 Test: 3 days, 13:25:14"
70% Lifetime,"Train: 3 days, 22:10:56.800000 Test: 3 days, 22:31:43.800000"
80% Lifetime,"Train: 12 days, 3:53:27.800000 Test: 11 days, 12:12:17"
90% Lifetime,"Train: 103 days, 16:24:41.600000 Test: 100 days, 18:19:13"
100% Lifetime,"Train: 1364 days, 6:11:49 Test: 1364 days, 6:11:49"


In [13]:
# Display information
tf_3 = ((timedelta(hours=np.percentile(split3_70_train["lifetimeHours"], 10)) -  timedelta(hours=np.percentile(split3_30_test["lifetimeHours"], 10))) +
(timedelta(hours=np.percentile(split3_70_train["lifetimeHours"], 20)) -  timedelta(hours=np.percentile(split3_30_test["lifetimeHours"], 20))) +
(timedelta(hours=np.percentile(split3_70_train["lifetimeHours"], 30)) -  timedelta(hours=np.percentile(split3_30_test["lifetimeHours"], 30))) +
(timedelta(hours=np.percentile(split3_70_train["lifetimeHours"], 40)) -  timedelta(hours=np.percentile(split3_30_test["lifetimeHours"], 40))) +
(timedelta(hours=np.percentile(split3_70_train["lifetimeHours"], 50)) -  timedelta(hours=np.percentile(split3_30_test["lifetimeHours"], 50))) +
(timedelta(hours=np.percentile(split3_70_train["lifetimeHours"], 60)) -  timedelta(hours=np.percentile(split3_30_test["lifetimeHours"], 60))) +
(timedelta(hours=np.percentile(split3_70_train["lifetimeHours"], 70)) -  timedelta(hours=np.percentile(split3_30_test["lifetimeHours"], 70))) +
(timedelta(hours=np.percentile(split3_70_train["lifetimeHours"], 80)) -  timedelta(hours=np.percentile(split3_30_test["lifetimeHours"], 80))) +
(timedelta(hours=np.percentile(split3_70_train["lifetimeHours"], 90)) -  timedelta(hours=np.percentile(split3_30_test["lifetimeHours"], 90))) +
(timedelta(hours=np.percentile(split3_70_train["lifetimeHours"], 100)) -  timedelta(hours=np.percentile(split3_30_test["lifetimeHours"], 100)))) / 10
print(tf_3)

8:20:03.980000


## URL APT Phishing Indicator Dataset

In [14]:
customData = pd.read_csv(".\Data\\Phishing\VT_Submissions_Metadata.csv", sep=",", encoding='windows-1252')
# Separat sightings
customData["sightings"] = customData["sightings"].str.split("#")

# Remove all rows where the indicator is younger than 1 month
present = datetime.now()
past = timedelta(days=31)
year = present - past

customData["Last Submission"] = customData["Last Submission"].apply(lambda x: pd.to_datetime(x))
customData["First Submission"] = customData["First Submission"].apply(lambda x: pd.to_datetime(x))
customData["lifetimeInHours"] = (customData["Last Submission"] - customData["First Submission"]) / pd.Timedelta(hours=1)
customData = customData[customData["Last Submission"] < year]
customData = customData[customData["Last Submission"] != customData["First Submission"]]
customData = customData[customData['lifetimeInHours'] >= 0].dropna()
customData = customData[customData['Malicious'] != 0]

### Percentile

In [15]:
# Display information
average = [["10% Lifetime",timedelta(hours=np.percentile(customData["lifetimeInHours"], 10))], 
           ["20% Lifetime",timedelta(hours=np.percentile(customData["lifetimeInHours"], 20))], 
           ["30% Lifetime",timedelta(hours=np.percentile(customData["lifetimeInHours"], 30))],
           ["40% Lifetime",timedelta(hours=np.percentile(customData["lifetimeInHours"], 40))], 
           ["50% Lifetime",timedelta(hours=np.percentile(customData["lifetimeInHours"], 50))],
           ["60% Lifetime",timedelta(hours=np.percentile(customData["lifetimeInHours"], 60))],
           ["70% Lifetime",timedelta(hours=np.percentile(customData["lifetimeInHours"], 70))],
           ["80% Lifetime",timedelta(hours=np.percentile(customData["lifetimeInHours"], 80))],
           ["90% Lifetime",timedelta(hours=np.percentile(customData["lifetimeInHours"], 90))],
           ["100% Lifetime",timedelta(hours=np.percentile(customData["lifetimeInHours"], 100))]]

display(HTML(
   "<table><tr>{}</tr></table>".format(
       "</tr><tr>".join(
           "<td>{}</td>".format("</td><td>".join(str(_) for _ in row)) for row in average))
))

0,1
10% Lifetime,"114 days, 6:00:50.800000"
20% Lifetime,"239 days, 16:20:21.200000"
30% Lifetime,"788 days, 2:36:05.800000"
40% Lifetime,"1007 days, 19:47:17"
50% Lifetime,"1269 days, 8:32:23"
60% Lifetime,"1591 days, 0:40:39"
70% Lifetime,"1732 days, 18:35:30.600000"
80% Lifetime,"2191 days, 10:55:37"
90% Lifetime,"2574 days, 2:44:31.200000"
100% Lifetime,"3267 days, 20:53:01"


In [16]:
# Split1 = 50/50
split1_50_train, split1_50_test = train_test_split(customData, test_size=0.5)

# Split1 = 70/30
split2_30_train, split2_70_test = train_test_split(customData, test_size=0.7)

# Split1 = 30/70
split3_70_train, split3_30_test = train_test_split(customData, test_size=0.3)

### Split 1

In [17]:
# Display information
average = [["10% Lifetime","Train: " +  str(timedelta(hours=np.percentile(split1_50_train["lifetimeInHours"], 10))) + " Test: " +  str(timedelta(hours=np.percentile(split1_50_test["lifetimeInHours"], 10)))], 
           ["20% Lifetime","Train: " +  str(timedelta(hours=np.percentile(split1_50_train["lifetimeInHours"], 20))) + " Test: " +  str(timedelta(hours=np.percentile(split1_50_test["lifetimeInHours"], 20)))], 
           ["30% Lifetime","Train: " +  str(timedelta(hours=np.percentile(split1_50_train["lifetimeInHours"], 30))) + " Test: " +  str(timedelta(hours=np.percentile(split1_50_test["lifetimeInHours"], 30)))],
           ["40% Lifetime","Train: " +  str(timedelta(hours=np.percentile(split1_50_train["lifetimeInHours"], 40))) + " Test: " +  str(timedelta(hours=np.percentile(split1_50_test["lifetimeInHours"], 40)))], 
           ["50% Lifetime","Train: " +  str(timedelta(hours=np.percentile(split1_50_train["lifetimeInHours"], 50))) + " Test: " +  str(timedelta(hours=np.percentile(split1_50_test["lifetimeInHours"], 50)))],
           ["60% Lifetime","Train: " +  str(timedelta(hours=np.percentile(split1_50_train["lifetimeInHours"], 60))) + " Test: " +  str(timedelta(hours=np.percentile(split1_50_test["lifetimeInHours"], 60)))],
           ["70% Lifetime","Train: " +  str(timedelta(hours=np.percentile(split1_50_train["lifetimeInHours"], 70))) + " Test: " +  str(timedelta(hours=np.percentile(split1_50_test["lifetimeInHours"], 70)))],
           ["80% Lifetime","Train: " +  str(timedelta(hours=np.percentile(split1_50_train["lifetimeInHours"], 80))) + " Test: " +  str(timedelta(hours=np.percentile(split1_50_test["lifetimeInHours"], 80)))],
           ["90% Lifetime","Train: " +  str(timedelta(hours=np.percentile(split1_50_train["lifetimeInHours"], 90))) + " Test: " +  str(timedelta(hours=np.percentile(split1_50_test["lifetimeInHours"], 90)))],
           ["100% Lifetime","Train: " +  str(timedelta(hours=np.percentile(split1_50_train["lifetimeInHours"], 100))) + " Test: " +  str(timedelta(hours=np.percentile(split1_50_test["lifetimeInHours"], 100)))]]

display(HTML(
   "<table><tr>{}</tr></table>".format(
       "</tr><tr>".join(
           "<td>{}</td>".format("</td><td>".join(str(_) for _ in row)) for row in average))
))

0,1
10% Lifetime,"Train: 114 days, 5:59:47.400000 Test: 114 days, 12:29:07.500000"
20% Lifetime,"Train: 215 days, 10:05:20.400000 Test: 244 days, 7:46:52.800000"
30% Lifetime,"Train: 759 days, 14:56:03.600000 Test: 817 days, 3:02:04.600000"
40% Lifetime,"Train: 1004 days, 12:40:59.400000 Test: 1018 days, 7:39:41.200000"
50% Lifetime,"Train: 1278 days, 12:02:54 Test: 1267 days, 12:21:30.500000"
60% Lifetime,"Train: 1590 days, 1:45:40.800000 Test: 1593 days, 16:19:46.200000"
70% Lifetime,"Train: 1696 days, 18:41:46.800000 Test: 1736 days, 9:58:29.300000"
80% Lifetime,"Train: 2072 days, 7:22:23.800000 Test: 2234 days, 19:18:10.800000"
90% Lifetime,"Train: 2461 days, 8:56:51.600000 Test: 2673 days, 4:40:46.500000"
100% Lifetime,"Train: 3207 days, 16:26:02 Test: 3267 days, 20:53:01"


In [18]:
# Display information
average = [["10% Lifetime",timedelta(hours=np.percentile(split2_30_train["lifetimeInHours"], 10)) -  timedelta(hours=np.percentile(split2_70_test["lifetimeInHours"], 10))], 
           ["20% Lifetime",timedelta(hours=np.percentile(split2_30_train["lifetimeInHours"], 20)) -  timedelta(hours=np.percentile(split2_70_test["lifetimeInHours"], 20))], 
           ["30% Lifetime",timedelta(hours=np.percentile(split2_30_train["lifetimeInHours"], 30)) -  timedelta(hours=np.percentile(split2_70_test["lifetimeInHours"], 30))],
           ["40% Lifetime",timedelta(hours=np.percentile(split2_30_train["lifetimeInHours"], 40)) -  timedelta(hours=np.percentile(split2_70_test["lifetimeInHours"], 40))], 
           ["50% Lifetime",timedelta(hours=np.percentile(split2_30_train["lifetimeInHours"], 50)) -  timedelta(hours=np.percentile(split2_70_test["lifetimeInHours"], 50))],
           ["60% Lifetime",timedelta(hours=np.percentile(split2_30_train["lifetimeInHours"], 60)) -  timedelta(hours=np.percentile(split2_70_test["lifetimeInHours"], 60))],
           ["70% Lifetime",timedelta(hours=np.percentile(split2_30_train["lifetimeInHours"], 70)) -  timedelta(hours=np.percentile(split2_70_test["lifetimeInHours"], 70))],
           ["80% Lifetime",timedelta(hours=np.percentile(split2_30_train["lifetimeInHours"], 80)) -  timedelta(hours=np.percentile(split2_70_test["lifetimeInHours"], 80))],
           ["90% Lifetime",timedelta(hours=np.percentile(split2_30_train["lifetimeInHours"], 90)) -  timedelta(hours=np.percentile(split2_70_test["lifetimeInHours"], 90))],
           ["100% Lifetime",timedelta(hours=np.percentile(split2_30_train["lifetimeInHours"], 100)) -  timedelta(hours=np.percentile(split2_70_test["lifetimeInHours"], 100))]]

display(HTML(
   "<table><tr>{}</tr></table>".format(
       "</tr><tr>".join(
           "<td>{}</td>".format("</td><td>".join(str(_) for _ in row)) for row in average))
))

0,1
10% Lifetime,"-1 day, 23:58:43.400000"
20% Lifetime,"-36 days, 10:00:24.400000"
30% Lifetime,"-93 days, 7:19:56.300000"
40% Lifetime,"-31 days, 14:02:26.800000"
50% Lifetime,"-23 days, 9:35:30.500000"
60% Lifetime,"60 days, 22:43:35.200000"
70% Lifetime,"94 days, 13:16:27.400000"
80% Lifetime,"217 days, 18:55:19"
90% Lifetime,"149 days, 15:45:20"
100% Lifetime,"-9 days, 19:48:11"


In [19]:
# Display information
vt_1 = ((timedelta(hours=np.percentile(split1_50_train["lifetimeInHours"], 10)) -  timedelta(hours=np.percentile(split1_50_test["lifetimeInHours"], 10))) +
(timedelta(hours=np.percentile(split1_50_train["lifetimeInHours"], 20)) -  timedelta(hours=np.percentile(split1_50_test["lifetimeInHours"], 20))) +
(timedelta(hours=np.percentile(split1_50_train["lifetimeInHours"], 30)) -  timedelta(hours=np.percentile(split1_50_test["lifetimeInHours"], 30))) +
(timedelta(hours=np.percentile(split1_50_train["lifetimeInHours"], 40)) -  timedelta(hours=np.percentile(split1_50_test["lifetimeInHours"], 40))) +
(timedelta(hours=np.percentile(split1_50_train["lifetimeInHours"], 50)) -  timedelta(hours=np.percentile(split1_50_test["lifetimeInHours"], 50))) +
(timedelta(hours=np.percentile(split1_50_train["lifetimeInHours"], 60)) -  timedelta(hours=np.percentile(split1_50_test["lifetimeInHours"], 60))) +
(timedelta(hours=np.percentile(split1_50_train["lifetimeInHours"], 70)) -  timedelta(hours=np.percentile(split1_50_test["lifetimeInHours"], 70))) +
(timedelta(hours=np.percentile(split1_50_train["lifetimeInHours"], 80)) -  timedelta(hours=np.percentile(split1_50_test["lifetimeInHours"], 80))) +
(timedelta(hours=np.percentile(split1_50_train["lifetimeInHours"], 90)) -  timedelta(hours=np.percentile(split1_50_test["lifetimeInHours"], 90))) +
(timedelta(hours=np.percentile(split1_50_train["lifetimeInHours"], 100)) -  timedelta(hours=np.percentile(split1_50_test["lifetimeInHours"], 100)))) / 10
print(vt_1)

-57 days, 6:38:49.940000


### Split 2

In [20]:
# Display information
average = [["10% Lifetime","Train: " +  str(timedelta(hours=np.percentile(split2_30_train["lifetimeInHours"], 10))) + " Test: " +  str(timedelta(hours=np.percentile(split2_70_test["lifetimeInHours"], 10)))], 
           ["20% Lifetime","Train: " +  str(timedelta(hours=np.percentile(split2_30_train["lifetimeInHours"], 20))) + " Test: " +  str(timedelta(hours=np.percentile(split2_70_test["lifetimeInHours"], 20)))], 
           ["30% Lifetime","Train: " +  str(timedelta(hours=np.percentile(split2_30_train["lifetimeInHours"], 30))) + " Test: " +  str(timedelta(hours=np.percentile(split2_70_test["lifetimeInHours"], 30)))],
           ["40% Lifetime","Train: " +  str(timedelta(hours=np.percentile(split2_30_train["lifetimeInHours"], 40))) + " Test: " +  str(timedelta(hours=np.percentile(split2_70_test["lifetimeInHours"], 40)))], 
           ["50% Lifetime","Train: " +  str(timedelta(hours=np.percentile(split2_30_train["lifetimeInHours"], 50))) + " Test: " +  str(timedelta(hours=np.percentile(split2_70_test["lifetimeInHours"], 50)))],
           ["60% Lifetime","Train: " +  str(timedelta(hours=np.percentile(split2_30_train["lifetimeInHours"], 60))) + " Test: " +  str(timedelta(hours=np.percentile(split2_70_test["lifetimeInHours"], 60)))],
           ["70% Lifetime","Train: " +  str(timedelta(hours=np.percentile(split2_30_train["lifetimeInHours"], 70))) + " Test: " +  str(timedelta(hours=np.percentile(split2_70_test["lifetimeInHours"], 70)))],
           ["80% Lifetime","Train: " +  str(timedelta(hours=np.percentile(split2_30_train["lifetimeInHours"], 80))) + " Test: " +  str(timedelta(hours=np.percentile(split2_70_test["lifetimeInHours"], 80)))],
           ["90% Lifetime","Train: " +  str(timedelta(hours=np.percentile(split2_30_train["lifetimeInHours"], 90))) + " Test: " +  str(timedelta(hours=np.percentile(split2_70_test["lifetimeInHours"], 90)))],
           ["100% Lifetime","Train: " +  str(timedelta(hours=np.percentile(split2_30_train["lifetimeInHours"], 100))) + " Test: " +  str(timedelta(hours=np.percentile(split2_70_test["lifetimeInHours"], 100)))]]

display(HTML(
   "<table><tr>{}</tr></table>".format(
       "</tr><tr>".join(
           "<td>{}</td>".format("</td><td>".join(str(_) for _ in row)) for row in average))
))

0,1
10% Lifetime,"Train: 114 days, 5:59:58.200000 Test: 114 days, 6:01:14.800000"
20% Lifetime,"Train: 215 days, 10:04:59 Test: 251 days, 0:04:34.600000"
30% Lifetime,"Train: 726 days, 12:12:15.600000 Test: 819 days, 4:52:19.300000"
40% Lifetime,"Train: 994 days, 11:03:34 Test: 1024 days, 21:01:07.200000"
50% Lifetime,"Train: 1269 days, 8:32:23 Test: 1291 days, 22:56:52.500000"
60% Lifetime,"Train: 1650 days, 21:15:59.600000 Test: 1589 days, 22:32:24.400000"
70% Lifetime,"Train: 1799 days, 2:23:46 Test: 1704 days, 13:07:18.600000"
80% Lifetime,"Train: 2288 days, 1:20:10.400000 Test: 2070 days, 6:24:51.400000"
90% Lifetime,"Train: 2667 days, 8:38:52.200000 Test: 2517 days, 16:53:32.200000"
100% Lifetime,"Train: 3259 days, 16:41:12 Test: 3267 days, 20:53:01"


In [21]:
vt_2 = ((timedelta(hours=np.percentile(split2_30_train["lifetimeInHours"], 10)) -  timedelta(hours=np.percentile(split2_70_test["lifetimeInHours"], 10))) +
(timedelta(hours=np.percentile(split2_30_train["lifetimeInHours"], 20)) -  timedelta(hours=np.percentile(split2_70_test["lifetimeInHours"], 20))) +
(timedelta(hours=np.percentile(split2_30_train["lifetimeInHours"], 30)) -  timedelta(hours=np.percentile(split2_70_test["lifetimeInHours"], 30))) +
(timedelta(hours=np.percentile(split2_30_train["lifetimeInHours"], 40)) -  timedelta(hours=np.percentile(split2_70_test["lifetimeInHours"], 40))) +
(timedelta(hours=np.percentile(split2_30_train["lifetimeInHours"], 50)) -  timedelta(hours=np.percentile(split2_70_test["lifetimeInHours"], 50))) +
(timedelta(hours=np.percentile(split2_30_train["lifetimeInHours"], 60)) -  timedelta(hours=np.percentile(split2_70_test["lifetimeInHours"], 60))) +
(timedelta(hours=np.percentile(split2_30_train["lifetimeInHours"], 70)) -  timedelta(hours=np.percentile(split2_70_test["lifetimeInHours"], 70))) +
(timedelta(hours=np.percentile(split2_30_train["lifetimeInHours"], 80)) -  timedelta(hours=np.percentile(split2_70_test["lifetimeInHours"], 80))) +
(timedelta(hours=np.percentile(split2_30_train["lifetimeInHours"], 90)) -  timedelta(hours=np.percentile(split2_70_test["lifetimeInHours"], 90))) +
(timedelta(hours=np.percentile(split2_30_train["lifetimeInHours"], 100)) -  timedelta(hours=np.percentile(split2_70_test["lifetimeInHours"], 100)))) / 10
print(vt_2)

33 days, 8:20:35.400000


### Split 3

In [22]:
# Display information
average = [["10% Lifetime","Train: " +  str(timedelta(hours=np.percentile(split3_70_train["lifetimeInHours"], 10))) + " Test: " +  str(timedelta(hours=np.percentile(split3_30_test["lifetimeInHours"], 10)))], 
           ["20% Lifetime","Train: " +  str(timedelta(hours=np.percentile(split3_70_train["lifetimeInHours"], 20))) + " Test: " +  str(timedelta(hours=np.percentile(split3_30_test["lifetimeInHours"], 20)))], 
           ["30% Lifetime","Train: " +  str(timedelta(hours=np.percentile(split3_70_train["lifetimeInHours"], 30))) + " Test: " +  str(timedelta(hours=np.percentile(split3_30_test["lifetimeInHours"], 30)))],
           ["40% Lifetime","Train: " +  str(timedelta(hours=np.percentile(split3_70_train["lifetimeInHours"], 40))) + " Test: " +  str(timedelta(hours=np.percentile(split3_30_test["lifetimeInHours"], 40)))], 
           ["50% Lifetime","Train: " +  str(timedelta(hours=np.percentile(split3_70_train["lifetimeInHours"], 50))) + " Test: " +  str(timedelta(hours=np.percentile(split3_30_test["lifetimeInHours"], 50)))],
           ["60% Lifetime","Train: " +  str(timedelta(hours=np.percentile(split3_70_train["lifetimeInHours"], 60))) + " Test: " +  str(timedelta(hours=np.percentile(split3_30_test["lifetimeInHours"], 60)))],
           ["70% Lifetime","Train: " +  str(timedelta(hours=np.percentile(split3_70_train["lifetimeInHours"], 70))) + " Test: " +  str(timedelta(hours=np.percentile(split3_30_test["lifetimeInHours"], 70)))],
           ["80% Lifetime","Train: " +  str(timedelta(hours=np.percentile(split3_70_train["lifetimeInHours"], 80))) + " Test: " +  str(timedelta(hours=np.percentile(split3_30_test["lifetimeInHours"], 80)))],
           ["90% Lifetime","Train: " +  str(timedelta(hours=np.percentile(split3_70_train["lifetimeInHours"], 90))) + " Test: " +  str(timedelta(hours=np.percentile(split3_30_test["lifetimeInHours"], 90)))],
           ["100% Lifetime","Train: " +  str(timedelta(hours=np.percentile(split3_70_train["lifetimeInHours"], 100))) + " Test: " +  str(timedelta(hours=np.percentile(split3_30_test["lifetimeInHours"], 100)))]]

display(HTML(
   "<table><tr>{}</tr></table>".format(
       "</tr><tr>".join(
           "<td>{}</td>".format("</td><td>".join(str(_) for _ in row)) for row in average))
))

0,1
10% Lifetime,"Train: 114 days, 10:14:47 Test: 114 days, 6:00:07.700000"
20% Lifetime,"Train: 255 days, 21:50:49.800000 Test: 215 days, 10:02:12.400000"
30% Lifetime,"Train: 820 days, 2:34:15.400000 Test: 258 days, 18:04:15.600000"
40% Lifetime,"Train: 1091 days, 1:21:10.800000 Test: 882 days, 0:37:29.600000"
50% Lifetime,"Train: 1365 days, 17:27:19 Test: 1213 days, 8:46:47.500000"
60% Lifetime,"Train: 1660 days, 18:10:27.200000 Test: 1472 days, 0:08:19"
70% Lifetime,"Train: 1755 days, 12:13:10 Test: 1639 days, 13:10:36.500000"
80% Lifetime,"Train: 2216 days, 7:56:11.600000 Test: 2068 days, 8:58:58.800000"
90% Lifetime,"Train: 2666 days, 20:09:43.200000 Test: 2505 days, 9:05:45.100000"
100% Lifetime,"Train: 3267 days, 20:53:01 Test: 2981 days, 3:03:13"


In [23]:
# Display information
vt_3 = ((timedelta(hours=np.percentile(split3_70_train["lifetimeInHours"], 10)) -  timedelta(hours=np.percentile(split3_30_test["lifetimeInHours"], 10))) +
(timedelta(hours=np.percentile(split3_70_train["lifetimeInHours"], 20)) -  timedelta(hours=np.percentile(split3_30_test["lifetimeInHours"], 20))) +
(timedelta(hours=np.percentile(split3_70_train["lifetimeInHours"], 30)) -  timedelta(hours=np.percentile(split3_30_test["lifetimeInHours"], 30))) +
(timedelta(hours=np.percentile(split3_70_train["lifetimeInHours"], 40)) -  timedelta(hours=np.percentile(split3_30_test["lifetimeInHours"], 40))) +
(timedelta(hours=np.percentile(split3_70_train["lifetimeInHours"], 50)) -  timedelta(hours=np.percentile(split3_30_test["lifetimeInHours"], 50))) +
(timedelta(hours=np.percentile(split3_70_train["lifetimeInHours"], 60)) -  timedelta(hours=np.percentile(split3_30_test["lifetimeInHours"], 60))) +
(timedelta(hours=np.percentile(split3_70_train["lifetimeInHours"], 70)) -  timedelta(hours=np.percentile(split3_30_test["lifetimeInHours"], 70))) +
(timedelta(hours=np.percentile(split3_70_train["lifetimeInHours"], 80)) -  timedelta(hours=np.percentile(split3_30_test["lifetimeInHours"], 80))) +
(timedelta(hours=np.percentile(split3_70_train["lifetimeInHours"], 90)) -  timedelta(hours=np.percentile(split3_30_test["lifetimeInHours"], 90))) +
(timedelta(hours=np.percentile(split3_70_train["lifetimeInHours"], 100)) -  timedelta(hours=np.percentile(split3_30_test["lifetimeInHours"], 100)))) / 10
print(vt_3)

186 days, 10:17:18.980000


## Overall

In [24]:
print((tf_1 + tf_2 + tf_3 + vt_1 + vt_2 + vt_3)/6)

27 days, 1:14:51.018333


In [25]:
print((tf_1 + tf_2 + tf_3)/3)

-1 day, 18:04:07.263333


In [26]:
print((vt_1 + vt_2 + vt_3)/3)

54 days, 8:25:34.773333


In [27]:
# Split1 = 50/50
split1_50_train_vt, split1_50_test_vt = train_test_split(customData, test_size=0.5)

# Split1 = 70/30
split2_30_train_vt, split2_70_test_vt = train_test_split(customData, test_size=0.7)

# Split1 = 30/70
split3_70_train_vt, split3_30_test_vt = train_test_split(customData, test_size=0.3)

# Split1 = 50/50
split1_50_train, split1_50_test = train_test_split(fullData, test_size=0.5)

# Split1 = 70/30
split2_30_train, split2_70_test = train_test_split(fullData, test_size=0.7)

# Split1 = 30/70
split3_70_train, split3_30_test = train_test_split(fullData, test_size=0.3)

In [30]:
# Display information
all_1 = ((timedelta(hours=np.percentile(split1_50_test_vt["lifetimeInHours"], 10)) -  timedelta(hours=np.percentile(split1_50_test["lifetimeHours"], 10))) +
(timedelta(hours=np.percentile(split1_50_test_vt["lifetimeInHours"], 20)) -  timedelta(hours=np.percentile(split1_50_test["lifetimeHours"], 20))) +
(timedelta(hours=np.percentile(split1_50_test_vt["lifetimeInHours"], 30)) -  timedelta(hours=np.percentile(split1_50_test["lifetimeHours"], 30))) +
(timedelta(hours=np.percentile(split1_50_test_vt["lifetimeInHours"], 40)) -  timedelta(hours=np.percentile(split1_50_test["lifetimeHours"], 40))) +
(timedelta(hours=np.percentile(split1_50_test_vt["lifetimeInHours"], 50)) -  timedelta(hours=np.percentile(split1_50_test["lifetimeHours"], 50))) +
(timedelta(hours=np.percentile(split1_50_test_vt["lifetimeInHours"], 60)) -  timedelta(hours=np.percentile(split1_50_test["lifetimeHours"], 60))) +
(timedelta(hours=np.percentile(split1_50_test_vt["lifetimeInHours"], 70)) -  timedelta(hours=np.percentile(split1_50_test["lifetimeHours"], 70))) +
(timedelta(hours=np.percentile(split1_50_test_vt["lifetimeInHours"], 80)) -  timedelta(hours=np.percentile(split1_50_test["lifetimeHours"], 80))) +
(timedelta(hours=np.percentile(split1_50_test_vt["lifetimeInHours"], 90)) -  timedelta(hours=np.percentile(split1_50_test["lifetimeHours"], 90))) +
(timedelta(hours=np.percentile(split1_50_test_vt["lifetimeInHours"], 100)) -  timedelta(hours=np.percentile(split1_50_test["lifetimeHours"], 100)))) / 10
print(all_1)

1192 days, 14:10:51.710000


In [32]:
# Display information
all_2 = ((timedelta(hours=np.percentile(split2_30_train_vt["lifetimeInHours"], 10)) -  timedelta(hours=np.percentile(split2_70_test["lifetimeHours"], 10))) +
(timedelta(hours=np.percentile(split2_30_train_vt["lifetimeInHours"], 20)) -  timedelta(hours=np.percentile(split2_70_test["lifetimeHours"], 20))) +
(timedelta(hours=np.percentile(split2_30_train_vt["lifetimeInHours"], 30)) -  timedelta(hours=np.percentile(split2_70_test["lifetimeHours"], 30))) +
(timedelta(hours=np.percentile(split2_30_train_vt["lifetimeInHours"], 40)) -  timedelta(hours=np.percentile(split2_70_test["lifetimeHours"], 40))) +
(timedelta(hours=np.percentile(split2_30_train_vt["lifetimeInHours"], 50)) -  timedelta(hours=np.percentile(split2_70_test["lifetimeHours"], 50))) +
(timedelta(hours=np.percentile(split2_30_train_vt["lifetimeInHours"], 60)) -  timedelta(hours=np.percentile(split2_70_test["lifetimeHours"], 60))) +
(timedelta(hours=np.percentile(split2_30_train_vt["lifetimeInHours"], 70)) -  timedelta(hours=np.percentile(split2_70_test["lifetimeHours"], 70))) +
(timedelta(hours=np.percentile(split2_30_train_vt["lifetimeInHours"], 80)) -  timedelta(hours=np.percentile(split2_70_test["lifetimeHours"], 80))) +
(timedelta(hours=np.percentile(split2_30_train_vt["lifetimeInHours"], 90)) -  timedelta(hours=np.percentile(split2_70_test["lifetimeHours"], 90))) +
(timedelta(hours=np.percentile(split2_30_train_vt["lifetimeInHours"], 100)) -  timedelta(hours=np.percentile(split2_70_test["lifetimeHours"], 100)))) / 10
print(all_2)

1436 days, 21:45:59.260000


In [33]:
# Display information
all_3 = ((timedelta(hours=np.percentile(split3_70_train_vt["lifetimeInHours"], 10)) -  timedelta(hours=np.percentile(split3_30_test["lifetimeHours"], 10))) +
(timedelta(hours=np.percentile(split3_70_train_vt["lifetimeInHours"], 20)) -  timedelta(hours=np.percentile(split3_30_test["lifetimeHours"], 20))) +
(timedelta(hours=np.percentile(split3_70_train_vt["lifetimeInHours"], 30)) -  timedelta(hours=np.percentile(split3_30_test["lifetimeHours"], 30))) +
(timedelta(hours=np.percentile(split3_70_train_vt["lifetimeInHours"], 40)) -  timedelta(hours=np.percentile(split3_30_test["lifetimeHours"], 40))) +
(timedelta(hours=np.percentile(split3_70_train_vt["lifetimeInHours"], 50)) -  timedelta(hours=np.percentile(split3_30_test["lifetimeHours"], 50))) +
(timedelta(hours=np.percentile(split3_70_train_vt["lifetimeInHours"], 60)) -  timedelta(hours=np.percentile(split3_30_test["lifetimeHours"], 60))) +
(timedelta(hours=np.percentile(split3_70_train_vt["lifetimeInHours"], 70)) -  timedelta(hours=np.percentile(split3_30_test["lifetimeHours"], 70))) +
(timedelta(hours=np.percentile(split3_70_train_vt["lifetimeInHours"], 80)) -  timedelta(hours=np.percentile(split3_30_test["lifetimeHours"], 80))) +
(timedelta(hours=np.percentile(split3_70_train_vt["lifetimeInHours"], 90)) -  timedelta(hours=np.percentile(split3_30_test["lifetimeHours"], 90))) +
(timedelta(hours=np.percentile(split3_70_train_vt["lifetimeInHours"], 100)) -  timedelta(hours=np.percentile(split3_30_test["lifetimeHours"], 100)))) / 10
print(all_3)

1295 days, 3:45:07.500000


In [34]:
print((all_1 + all_2 + all_3)/3)

1308 days, 5:13:59.490000
