In [1]:
#Approach One - log into ripe-atlas and retrieve status at timestamp in experiment data 
import requests
import pandas as pd

In [2]:
experiment_df = pd.read_pickle('traceroute.pickle')

In [3]:
def check_probe_connection(row):
    probe_ips = row['from']
    probe_ids = row['prb_id']
    timestamp = row['timestamp']
    
    results = []
    headers = {
        'Content-Type': 'application/json',
        'Accept': 'application/json',
        'X-Api-Key': "e27c24f4-f226-44f4-9a36-c5b05bd33f55"
    }
    
    
    url = f"https://stat.ripe.net/data/atlas-probes/data.json?probe_id={probe_ids}&timestamp={timestamp}&resource={probe_ips}"
            
    try:
        response = requests.get(url,headers=headers)
        response.raise_for_status()
        data = response.json()
                
        if 'error' in data:
            results.append({'Probe ID': probe_ids, 'Status': 'Error', 'Message': data['error']})
        else:
            if data['data']['probes']:
                results.append({'Probe ID': probe_ids, 'Status': 'Connected', 'Message': ''})
            else:
                results.append({'Probe ID': probe_ids, 'Status': 'Disconnected', 'Message': ''})
            
    except requests.exceptions.RequestException as e:
        results.append({'Probe ID': probe_ids, 'Status': 'Error', 'Message': str(e)})
        
    #df = pd.DataFrame(results)
    return results


In [4]:
indices = experiment_df.index.unique()
len(indices)

151651

In [5]:
Status_list = []

for idx in indices[0:20]:
    group = experiment_df[experiment_df.index == idx]
    group_row = group.iloc[0]
    Status = check_probe_connection(group_row)
    Status_list.append(Status)

In [6]:
result_df = pd.DataFrame(Status_list)

# Use apply() method with lambda function to split values into three columns
result_df[['Probe ID', 'Status', 'Message']] = result_df[0].apply(lambda x: pd.Series([x['Probe ID'], x['Status'], x['Message']]))

# Drop the original column
result_df.drop(0, axis=1, inplace=True)
result_df

Unnamed: 0,Probe ID,Status,Message
0,15118,Connected,
1,33627,Connected,
2,51265,Connected,
3,55787,Connected,
4,61357,Connected,
5,15118,Connected,
6,33627,Connected,
7,51265,Connected,
8,55787,Connected,
9,61357,Connected,


In [7]:
#Approach two - download the connection logs from ripe-atlas and use them to deduce the connection status of probes 
#when experiment was carried out 

import json

#function for combining json files 
def process_data(json_file_name, probe_id):
    with open(json_file_name, 'r') as file:
        data = json.load(file)
        
    results = {probe_id: []}

    for item in data:
        from_value = item['from']
        to_value = item['to']
        results[probe_id].append({'from': from_value, 'to': to_value})

    return results

In [8]:

probe_id_list = [33627, 51265, 55787, 61357, 15118]

# Dictionary to store combined json files with probe status 
combined_dict = []

# Iterate over servers
for server in probe_id_list:
    server_name = str(server)
    
    # Read JSON log file
    log_file = server_name + '.json'
    
    results = process_data(log_file, server)
        
    # Append to combined dictionary
    combined_dict.append(results)

In [9]:
# Convert the list of dictionaries into a more accessible format
uptime_dict = {}
for d in combined_dict:
    for server_id, uptime_ranges in d.items():
        uptime_dict[server_id] = uptime_ranges
#uptime_dict

In [10]:
# Function to check if the timestamp is within the server's uptime ranges
def is_probe_up(probe_id, timestamp):
    if probe_id in uptime_dict:
        for uptime_range in uptime_dict[probe_id]:
            if uptime_range['to'] is None:
                if uptime_range['from'] <= timestamp:
                    return "connected"
            else:
                if uptime_range['from'] <= timestamp <= uptime_range['to']:
                    return "connected"
    return "disconnected"

# Iterate through the DataFrame and check if the timestamp occurred during the server's uptime
experiment_df['probe_status'] = experiment_df.apply(lambda row: is_probe_up(row['prb_id'], row['timestamp']), axis=1)


In [11]:
outlier = experiment_df[(experiment_df['prb_id'] == 55787) & (experiment_df['new_time'] > '2023-06-01')]
outlier

Unnamed: 0,fw,mver,lts,endtime,dst_name,dst_addr,src_addr,proto,af,size,...,dst_names,src_names,Hour,distance,hop,hop_ip,rtt,unique_ips,avg_rtt,probe_status
3778,5080,2.6.2,23,1685572755,81.34.11.86,81.34.11.86,192.168.1.62,ICMP,4,48,...,Madrid,src_Barcelona2,0,498.438652,1,"[192.168.1.1, 192.168.1.1, 192.168.1.1]","[1.203, 0.965, 0.929]",[192.168.1.1],1.032333,connected
3778,5080,2.6.2,23,1685572755,81.34.11.86,81.34.11.86,192.168.1.62,ICMP,4,48,...,Madrid,src_Barcelona2,0,498.438652,2,"[81.46.38.140, 81.46.38.140, 81.46.38.140]","[3.054, 2.799, 2.826]",[81.46.38.140],2.893000,connected
3778,5080,2.6.2,23,1685572755,81.34.11.86,81.34.11.86,192.168.1.62,ICMP,4,48,...,Madrid,src_Barcelona2,0,498.438652,3,"[81.46.34.69, 81.46.34.69, 81.46.34.69]","[3.172, 3.054, 3.173]",[81.46.34.69],3.133000,connected
3778,5080,2.6.2,23,1685572755,81.34.11.86,81.34.11.86,192.168.1.62,ICMP,4,48,...,Madrid,src_Barcelona2,0,498.438652,4,"[None, None, None]","[nan, nan, nan]",[None],,connected
3778,5080,2.6.2,23,1685572755,81.34.11.86,81.34.11.86,192.168.1.62,ICMP,4,48,...,Madrid,src_Barcelona2,0,498.438652,5,"[None, None, None]","[nan, nan, nan]",[None],,connected
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
128861,5080,2.6.2,23,1687831432,79.145.127.11,79.145.127.11,192.168.1.62,ICMP,4,48,...,Malaga,src_Barcelona2,4,784.918801,5,"[80.58.96.149, 80.58.96.149, 80.58.96.149]","[20.931, 20.944, 20.715]",[80.58.96.149],20.863333,connected
128861,5080,2.6.2,23,1687831432,79.145.127.11,79.145.127.11,192.168.1.62,ICMP,4,48,...,Malaga,src_Barcelona2,4,784.918801,6,"[81.46.5.46, 81.46.5.46, 81.46.5.46]","[20.578, 20.404, 20.676]",[81.46.5.46],20.552667,connected
128861,5080,2.6.2,23,1687831432,79.145.127.11,79.145.127.11,192.168.1.62,ICMP,4,48,...,Malaga,src_Barcelona2,4,784.918801,7,"[81.41.220.245, 81.41.220.245, 81.41.220.245]","[19.523, 19.432, 19.747]",[81.41.220.245],19.567333,connected
128861,5080,2.6.2,23,1687831432,79.145.127.11,79.145.127.11,192.168.1.62,ICMP,4,48,...,Malaga,src_Barcelona2,4,784.918801,8,"[None, None, None]","[nan, nan, nan]",[None],,connected


In [12]:
status = experiment_df[(experiment_df['prb_id'] == 15118) & (experiment_df['new_time'] <= '2023-08-01') & (experiment_df['probe_status'] ==  'disconnected')]
status.tail(10)

Unnamed: 0,fw,mver,lts,endtime,dst_name,dst_addr,src_addr,proto,af,size,...,dst_names,src_names,Hour,distance,hop,hop_ip,rtt,unique_ips,avg_rtt,probe_status
66171,5080,2.6.2,339,1686613666,83.43.248.213,83.43.248.213,192.168.192.157,ICMP,4,48,...,Dst_Barcelona2,src_Madrid,1,497.866606,7,"[81.46.44.14, 81.46.44.14, 81.46.44.14]","[11.532, 11.678, 11.754]",[81.46.44.14],11.654667,disconnected
66171,5080,2.6.2,339,1686613666,83.43.248.213,83.43.248.213,192.168.192.157,ICMP,4,48,...,Dst_Barcelona2,src_Madrid,1,497.866606,8,"[83.43.248.213, 83.43.248.213, 83.43.248.213]","[13.353, 12.483, 12.894]",[83.43.248.213],12.91,disconnected
96501,5080,2.6.2,316,1686613643,83.51.50.60,83.51.50.60,192.168.192.157,ICMP,4,48,...,Dst_Barcelona,src_Madrid,1,500.367133,1,"[192.168.192.1, 192.168.192.1, 192.168.192.1]","[0.476, 0.421, 0.412]",[192.168.192.1],0.436333,disconnected
96501,5080,2.6.2,316,1686613643,83.51.50.60,83.51.50.60,192.168.192.157,ICMP,4,48,...,Dst_Barcelona,src_Madrid,1,500.367133,2,"[192.168.144.1, 192.168.144.1, 192.168.144.1]","[0.937, 1.714, 1.691]",[192.168.144.1],1.447333,disconnected
96501,5080,2.6.2,316,1686613643,83.51.50.60,83.51.50.60,192.168.192.157,ICMP,4,48,...,Dst_Barcelona,src_Madrid,1,500.367133,3,"[81.46.64.245, 81.46.64.245, 81.46.64.245]","[0.869, 1.716, 1.733]",[81.46.64.245],1.439333,disconnected
96501,5080,2.6.2,316,1686613643,83.51.50.60,83.51.50.60,192.168.192.157,ICMP,4,48,...,Dst_Barcelona,src_Madrid,1,500.367133,4,"[None, None, None]","[nan, nan, nan]",[None],,disconnected
96501,5080,2.6.2,316,1686613643,83.51.50.60,83.51.50.60,192.168.192.157,ICMP,4,48,...,Dst_Barcelona,src_Madrid,1,500.367133,5,"[None, None, None]","[nan, nan, nan]",[None],,disconnected
96501,5080,2.6.2,316,1686613643,83.51.50.60,83.51.50.60,192.168.192.157,ICMP,4,48,...,Dst_Barcelona,src_Madrid,1,500.367133,6,"[81.46.44.50, 81.46.44.50, 81.46.44.50]","[9.489, 9.579, 15.823]",[81.46.44.50],11.630333,disconnected
96501,5080,2.6.2,316,1686613643,83.51.50.60,83.51.50.60,192.168.192.157,ICMP,4,48,...,Dst_Barcelona,src_Madrid,1,500.367133,7,"[81.46.34.218, 81.46.34.218, 81.46.34.218]","[12.018, 9.501, 9.729]",[81.46.34.218],10.416,disconnected
96501,5080,2.6.2,316,1686613643,83.51.50.60,83.51.50.60,192.168.192.157,ICMP,4,48,...,Dst_Barcelona,src_Madrid,1,500.367133,8,"[83.51.50.60, 83.51.50.60, 83.51.50.60]","[12.1, 11.596, 11.755]",[83.51.50.60],11.817,disconnected


# Probe Resource

## Parameters
- **address_v4 (string)**
  - The last IPv4 address that was known to be held by this probe, or null if there is no known address. Note: a probe that connects over IPv6 may fail to report its IPv4 address, meaning that this field can sometimes be null even though the probe may have a working IPv4.

- **address_v6 (string)**
  - The last IPv6 address that was known to be held by this probe, or null if there is no known address.

- **asn_v4 (integer: int64)**
  - The IPv4 ASN if any.

- **asn_v6 (integer: int64)**
  - The IPv6 ASN if any.

- **country_code (string)**
  - An ISO-3166-1 alpha-2 code indicating the country that this probe is located in, as derived from the user supplied longitude and latitude.

- **description (string)**
  - User-defined description of the probe.

- **first_connected (string: date-time)**
  - When the probe connected for the first time (UTC Time and date in ISO-8601/ECMA 262 format).

- **geometry (number: float)**
  - A GeoJSON point object containing the user-supplied location of this probe. The longitude and latitude are contained within the `coordinates` array.

- **id (integer: int64)**
  - The ID of the probe.

- **is_anchor (boolean)**
  - Whether or not this probe is a RIPE Atlas Anchor.

- **is_public (boolean)**
  - If a probe is not public, then certain details, including exact IP addresses, are not returned.

- **last_connected (string: date-time)**
  - When the probe connected for the last time (UTC Time and date in ISO-8601/ECMA 262 format).

- **prefix_v4 (string)**
  - The IPv4 prefix if any.

- **prefix_v6 (string)**
  - The IPv6 prefix if any.

- **status (string)**
  - A JSON object containing:
    - **id:** The connection status ID for this probe (integer [0-3]).
    - **name:** The connection status (string [Never Connected, Connected, Disconnected, Abandoned]).
    - **since:** The datetime of the last change in connection status

- **tags(array):**
- **total_uptime(integer: int64):**
  - Accumulated uptime for this probe in seconds  
- **type(string)**
  - The type of the object


In [13]:

#Approach three - Use the ripe-atlas Probe Object 
from ripe.atlas.cousteau import Probe

probe_id_list = [33627, 51265, 55787, 61357, 15118,26072,30381,15632,1004200,14866]

# Create an empty DataFrame to store probe information
columns = ["ProbeID", "ASN", "CountryCode", "IPAddress", "Status", "Latitude", "Longitude",
           "Tags","Public","Status_since","Description","Prefix_v4",
            ]

probe_list = []

# Loop through each probe ID and retrieve information
for probe_id in probe_id_list:
    probe = Probe(id=probe_id, is_oneoff=False)
    
    
# Extract probe information
    row_data = {
        "ProbeID": probe.id,
        "ASN": probe.asn_v4,
        "CountryCode": probe.country_code,
        "IPAddress": probe.address_v4,
        "Status": probe.status,
        "Anchor": probe.is_anchor,
        "Latitude": probe.geometry['coordinates'][1],
        "Longitude": probe.geometry['coordinates'][0],
        "Tags": probe.tags,
        "Public": probe.is_public,
        "Since": probe.meta_data.get('status_since'),
        "Uptime": probe.meta_data.get('total_uptime'),
        "FirstConnected": probe.meta_data.get('first_connected'),
        "Prefix_v4": probe.prefix_v4,
        "LastConnected": probe.meta_data.get('last_connected')
    }
    
    # Append the row to the list
    probe_list.append(row_data)
    
# Append the row to the DataFrame
probe_df = pd.DataFrame(probe_list)
    

In [14]:
#convert to ordinary time 
from datetime import datetime
def convert_time(datetime_column):
    new_time_col = []
    for i in datetime_column:
        my_datetime = datetime.fromtimestamp(i)
        new_time_col.append(my_datetime)
    return new_time_col

In [15]:
probe_df['Since'] = convert_time(probe_df['Since'])
probe_df['FirstConnected'] = convert_time(probe_df['FirstConnected'])
probe_df['LastConnected'] = convert_time(probe_df['LastConnected'])

In [16]:
#convert to days the uptime of probes
def convert_uptime(uptime_column):
    new_uptime_col = []
    for i in uptime_column:
        my_uptime = i/3600
        days_uptime = my_uptime/24
        new_uptime_col.append(days_uptime)
    return new_uptime_col

probe_df['Uptime(days)'] = convert_uptime(probe_df['Uptime'])


In [17]:
probe_df

Unnamed: 0,ProbeID,ASN,CountryCode,IPAddress,Status,Anchor,Latitude,Longitude,Tags,Public,Since,Uptime,FirstConnected,Prefix_v4,LastConnected,Uptime(days)
0,33627,3352,ES,79.154.53.175,Connected,False,41.4005,2.1885,"[{'name': 'Fibre', 'slug': 'fibre'}, {'name': ...",True,2023-11-30 04:32:26,168902923,2018-07-23 14:01:57,79.154.0.0/16,2024-01-22 11:38:52,1954.894942
1,51265,3352,ES,79.150.158.173,Connected,False,39.8495,3.0905,"[{'name': 'Fibre', 'slug': 'fibre'}, {'name': ...",True,2023-10-23 18:12:41,152839978,2019-02-26 22:56:59,79.150.0.0/16,2024-01-22 11:38:53,1768.981227
2,55787,3352,ES,79.152.111.181,Disconnected,False,41.4105,2.1615,"[{'name': 'system: IPv4 Capable', 'slug': 'sys...",True,2023-06-27 05:08:52,78580047,2020-05-27 10:13:03,79.152.0.0/16,2023-06-27 05:08:52,909.491285
3,61357,3352,ES,83.53.163.145,Connected,False,43.3105,-5.7005,"[{'name': 'Fibre', 'slug': 'fibre'}, {'name': ...",True,2023-12-12 15:16:39,33315064,2023-01-01 17:15:49,83.53.0.0/16,2024-01-22 11:38:53,385.591019
4,15118,3352,ES,80.39.150.99,Connected,False,40.4515,-3.6725,"[{'name': 'Home', 'slug': 'home'}, {'name': 'N...",True,2024-01-05 12:24:15,257420551,2014-03-17 23:03:05,80.39.0.0/16,2024-01-22 11:38:54,2979.404525
5,26072,3352,ES,79.145.122.135,Connected,False,36.6185,-4.5095,"[{'name': 'Fibre', 'slug': 'fibre'}, {'name': ...",True,2024-01-15 12:30:29,216841444,2016-05-05 13:43:55,79.145.0.0/16,2024-01-22 11:38:54,2509.738935
6,30381,3352,ES,83.51.63.181,Connected,False,41.3775,2.1415,"[{'name': 'Fibre', 'slug': 'fibre'}, {'name': ...",True,2024-01-05 12:24:25,115923593,2017-05-09 16:41:50,83.51.0.0/16,2024-01-22 11:38:54,1341.708252
7,15632,3352,ES,83.42.100.54,Connected,False,41.4685,2.0885,"[{'name': 'system: V3', 'slug': 'system-v3'}, ...",True,2024-01-05 12:27:12,303171205,2014-04-02 00:26:44,83.42.0.0/16,2024-01-22 11:38:54,3508.925984
8,1004200,3352,ES,88.24.229.106,Connected,False,28.0275,-15.5025,"[{'name': 'Fibre', 'slug': 'fibre'}, {'name': ...",True,2023-11-14 19:14:57,52902878,2022-05-18 22:27:33,88.24.0.0/16,2024-01-22 11:38:55,612.301829
9,14866,3352,ES,81.35.25.21,Connected,False,40.5405,-3.6495,"[{'name': 'system: V3', 'slug': 'system-v3'}, ...",True,2023-12-12 15:25:29,262814372,2014-05-21 21:06:01,81.35.0.0/16,2024-01-22 11:38:55,3041.833009


In [18]:
# Save the DataFrame to a pickle file
probe_df.to_pickle('probe_data.pickle')

In [19]:
cols_drop = ['Tags','Uptime(days)']
export_df = probe_df.drop(cols_drop, axis=1)