# Generate .csv file to visualize in Gephi

In [8]:
import pandas as pd
import numpy as np
from datetime import datetime
import os
import os.path as osp

### Use pandas packages to read .csv file of edgelists

In [31]:
PROJ_DIR = osp.dirname(osp.abspath(''))
f_edge = PROJ_DIR + "/tgb/datasets/tgbl_flight/tgbl-flight_edgelist_v2.csv"

df_e = pd.read_csv(f_edge)
df_e.head()

Unnamed: 0,timestamp,src,dst,callsign,typecode
0,1546318800,YMML,LFPG,HVN19,
1,1546318800,YMML,LEBL,CCA839,
2,1546318800,YSSY,EDDF,CES219,A332
3,1546318800,LEMD,LEMD,AEA040,A332
4,1546318800,YSSY,LFPG,CXA825,B788


### Specify the number of edges to sample

In [42]:
num_sampl = 10000
df_e_sampl = df_e.sample(num_sampl)

### Convert Unix timestamps to datetime

In [33]:
def ts_to_date(ts):
    return datetime.fromtimestamp(int(ts)).strftime('%Y-%m')

# testing
ts_to_date(1658203200)

'2022-07'

In [34]:
df_e["datetime"] = pd.to_datetime(df_e["timestamp"].apply(ts_to_date)).astype(int)//10**9

df_e.head()

Unnamed: 0,timestamp,src,dst,callsign,typecode,datetime
0,1546318800,YMML,LFPG,HVN19,,1546300800
1,1546318800,YMML,LEBL,CCA839,,1546300800
2,1546318800,YSSY,EDDF,CES219,A332,1546300800
3,1546318800,LEMD,LEMD,AEA040,A332,1546300800
4,1546318800,YSSY,LFPG,CXA825,B788,1546300800


### Create new dataframe by selecting only the necessary column preventing memory blow up during Gephi running

In [40]:
data = {"Source": df_e.src,
        "Target": df_e.dst,
        "datetime": df_e.datetime,
}
new_df = pd.DataFrame(data)

In [36]:
f_edge_out_full = PROJ_DIR + "/tgb/datasets/tgbl_flight/tgbl-flight_edgelist_v2_out_full.csv"
num_sampl = 200000

new_df.sample(num_sampl).to_csv(f_edge_out_full, index= False)

### Use pandas package to read the node file. It contains invidual node information.

In [37]:
f_node = PROJ_DIR + "/tgb/datasets/tgbl_flight/airport_node_feat_v2.csv"

df_n = pd.read_csv(f_node)
df_n.head()

Unnamed: 0,airport_code,type,continent,iso_region,longitude,latitude
0,00A,heliport,,US-PA,-74.933601,40.070801
1,00AA,small_airport,,US-KS,-101.473911,38.704022
2,00AK,small_airport,,US-AK,-151.695999,59.9492
3,00AL,small_airport,,US-AL,-86.770302,34.864799
4,00AR,closed,,US-AR,-91.254898,35.6087


In [38]:
data = {"Id": df_n.airport_code,
        "continent": df_n.continent,
        "iso_region": df_n.iso_region,
        "longitue": df_n.longitude,
        "latitue": df_n.latitude,
}

df_n_out = pd.DataFrame(data)
f_node_out = PROJ_DIR + "/tgb/datasets/tgbl_flight/airport_node_feat_v2_out_full.csv"
df_n_out.to_csv(f_node_out, index= False, na_rep= "NA")