# You Only Need to Run the Bottom code snippet

remove the stop information in GasTech

In [3]:
import pandas as pd

df = pd.read_csv('../data/stops.csv')

df = df[df['location'] != 'Gastech']

df.to_csv('stops_filtered.csv', index=False)

print("The data has been stored in stops_filtered.csv")


The data has been stored in stops_filtered.csv


find the overlap

In [24]:


df = pd.read_csv('stops_filtered.csv')

df['arrival'] = pd.to_datetime(df['arrival'])
df['departure'] = pd.to_datetime(df['departure'])


result = pd.DataFrame(columns=['person1', 'person2', 'meeting_times', 'meeting_information'])

unique_locations = df['location'].unique()

for location in unique_locations:
    df_location = df[df['location'] == location]
    for i in range(len(df_location)):
        for j in range(i+1, len(df_location)):
            person1 = df_location.iloc[i]
            person2 = df_location.iloc[j]
            
            if person1['departure'] > person2['arrival'] and person1['arrival'] < person2['departure']:
                meeting_start = max(person1['arrival'], person2['arrival'])
                meeting_end = min(person1['departure'], person2['departure'])
                meeting_duration = meeting_end - meeting_start
                
                person1_name = person1['Name']
                person2_name = person2['Name']
                
                if person1_name > person2_name:
                    person1_name, person2_name = person2_name, person1_name
                
                meeting_info = (
                    location,
                    meeting_start.strftime('%Y-%m-%d %H:%M:%S'),
                    meeting_end.strftime('%Y-%m-%d %H:%M:%S'),
                    str(meeting_duration)
                )
                
                if ((result['person1'] == person1_name) & (result['person2'] == person2_name)).any():
                    idx = result[(result['person1'] == person1_name) & (result['person2'] == person2_name)].index[0]
                    result.at[idx, 'meeting_times'] += 1
                    result.at[idx, 'meeting_information'].append(meeting_info)
                else:
                    new_row = pd.DataFrame([{
                        'person1': person1_name,
                        'person2': person2_name,
                        'meeting_times': 1,
                        'meeting_information': [meeting_info]
                    }])
                    result = pd.concat([result, new_row], ignore_index=True)


result['meeting_information'] = result['meeting_information'].apply(lambda x: str([f'("{i[0]}", "{i[1]}", "{i[2]}", "{i[3]}")' for i in x]))


result.to_csv('meeting_data111.csv', index=False)

print("process complete, data in meeting_data.csv")


KeyboardInterrupt: 

# Run this snippet!
visualize the relationship network

In [4]:
from dash import Dash, html, dcc, Input, Output
import dash_cytoscape as cyto
import ast

df = pd.read_csv('../data/meeting_data.csv')

app = Dash(__name__)


def get_elements(min_meeting_times, max_meeting_times, min_meeting_duration_seconds, max_meeting_duration_seconds):
    nodes = []
    edges = []
    
    for _, row in df.iterrows():
        person1 = row['person1']
        person2 = row['person2']
        meeting_times = row['meeting_times']
        meetings = ast.literal_eval(row['meeting_information'])
        
        if meeting_times < min_meeting_times or meeting_times > max_meeting_times:
            continue
        
        filtered_meetings = []
        for meeting in meetings:
            try:
                meeting = meeting.strip('()').split(', ')
                if len(meeting) != 4:
                    raise ValueError("Invalid meeting format")
                
                location, start, end, duration = [item.strip('"') for item in meeting]
                duration_parts = list(map(int, duration.split(':')))
                meeting_duration_seconds = duration_parts[0] * 3600 + duration_parts[1] * 60 + duration_parts[2]
            except Exception as e:
                print(f"Error processing meeting duration: {e} - Meeting: {meeting}")
                continue

            if min_meeting_duration_seconds <= meeting_duration_seconds <= max_meeting_duration_seconds:
                filtered_meetings.append((location, start, end, duration))
        
        # filter the meeting time
        if not filtered_meetings:
            continue
        
        if not any(node['data']['id'] == person1 for node in nodes):
            nodes.append({'data': {'id': person1, 'label': person1}})
        if not any(node['data']['id'] == person2 for node in nodes):
            nodes.append({'data': {'id': person2, 'label': person2}})
        
        edge_data = {
            'data': {
                'source': person1,
                'target': person2,
                'meetings': filtered_meetings
            }
        }
        
        if not any(edge['data']['source'] == person1 and edge['data']['target'] == person2 for edge in edges):
            edges.append(edge_data)
    
    return nodes + edges

app.layout = html.Div([
    html.Div([
        dcc.RangeSlider(
            id='meeting-times-slider',
            min=1,
            max=df['meeting_times'].max(),
            value=[1, df['meeting_times'].max()],
            marks={i: str(i) for i in range(1, df['meeting_times'].max() + 1)},
            step=1,
        ),
        html.Label('Meeting Times Range')
    ]),
    html.Div([
        dcc.RangeSlider(
            id='meeting-duration-slider',
            min=0,
            max=24 * 60,  
            value=[0, 24 * 60],
            marks={**{i: (str(i) if i < 60 else f'{i // 60}h') for i in range(0, 61, 10)},
                   **{i: f'{i // 60}h' for i in range(60, 24 * 60 + 1, 60)}},
            step=5,
        ),
        html.Label('Meeting Duration Range (minutes)')
    ]),
    cyto.Cytoscape(
        id='cytoscape',
        elements=get_elements(1, df['meeting_times'].max(), 0, 10 * 60), 
        style={'width': '100%', 'height': '600px'},
        layout={'name': 'circle', 'radius': 700}, 
        stylesheet=[
            {
                'selector': 'node',
                'style': {
                    'label': 'data(label)'
                }
            }
        ]
    ),
    html.Div(id='output')
])

@app.callback(
    Output('cytoscape', 'elements'),
    Input('meeting-times-slider', 'value'),
    Input('meeting-duration-slider', 'value')
)
def update_elements(meeting_times_range, meeting_duration_range):
    min_meeting_times, max_meeting_times = meeting_times_range
    min_meeting_duration_seconds = meeting_duration_range[0] * 60  
    max_meeting_duration_seconds = meeting_duration_range[1] * 60  
    elements = get_elements(min_meeting_times, max_meeting_times, min_meeting_duration_seconds, max_meeting_duration_seconds)
    print(f"Elements: {elements}")  
    return elements

@app.callback(
    Output('output', 'children'),
    Input('cytoscape', 'tapEdge')
)
def display_edge_data(edge_data):
    if edge_data:
        meetings = edge_data['data']['meetings']
        meeting_info = [f"Location: {m[0]}, Start: {m[1]}, End: {m[2]}, Duration: {m[3]}" for m in meetings]
        return html.Div([
            html.H4(f"Meetings between {edge_data['data']['source']} and {edge_data['data']['target']}"),
            html.Ul([html.Li(info) for info in meeting_info])
        ])
    return "Click on an edge to see meeting details."

if __name__ == '__main__':
    app.run_server(debug=True, port=8079)


Elements: [{'data': {'id': 'Lucas Alcazar', 'label': 'Lucas Alcazar'}}, {'data': {'id': 'Nils Calixto', 'label': 'Nils Calixto'}}, {'data': {'id': 'Hideki Cocinaro', 'label': 'Hideki Cocinaro'}}, {'data': {'id': 'Lidelse Dedos', 'label': 'Lidelse Dedos'}}, {'data': {'id': 'Birgitta Frente', 'label': 'Birgitta Frente'}}, {'data': {'id': 'Marin Onda', 'label': 'Marin Onda'}}, {'data': {'id': 'Kare Orilla', 'label': 'Kare Orilla'}}, {'data': {'id': 'Hennie Osvaldo', 'label': 'Hennie Osvaldo'}}, {'data': {'id': 'Sten Sanjorge Jr.', 'label': 'Sten Sanjorge Jr.'}}, {'data': {'id': 'Gustav Cazar', 'label': 'Gustav Cazar'}}, {'data': {'id': 'Orhan Strum', 'label': 'Orhan Strum'}}, {'data': {'id': 'Edvard Vann', 'label': 'Edvard Vann'}}, {'data': {'id': 'Lars Azada', 'label': 'Lars Azada'}}, {'data': {'id': 'Axel Calzas', 'label': 'Axel Calzas'}}, {'data': {'id': 'Vira Frente', 'label': 'Vira Frente'}}, {'data': {'id': 'Minke Mies', 'label': 'Minke Mies'}}, {'data': {'id': 'Ingrid Barranco', 'l