In [None]:
# Project 5: Railway Accidents
# Part 4: Create UI and visualising the data

# Objective for this notebook:
#     Data Analysis and User Interface: Create a graphical user interface with Dash/Plotly 
#     that supports the following functionalities:
#     - display a summary of all accident causes in a word cloud
#     - count the numbers of deaths through railway accidents per decade and generate a 
#     time-based line chart that shows the temporal development
#     - for the entries without geo coordinates, enrich the CSV file by geocoding the 
#     corresponding accident location or, if that fails, the corresponding country
#     - plot all accidents on a map with one dot per incident and a color that indicates 
#     the instrument category

In [None]:
# How to run a Dash app in Google Colab
## Requirements
### Install ngrok
!wget https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip
!unzip ngrok-stable-linux-amd64.zip

### Run ngrok to tunnel Dash app port 8050 to the outside world. 
### This command runs in the background.
get_ipython().system_raw('./ngrok http 8050 &')

### Get the public URL where you can access the Dash app. Copy this URL.
! curl -s http://localhost:4040/api/tunnels | python3 -c \
    "import sys, json; print(json.load(sys.stdin)['tunnels'][0]['public_url'])"

### Install Dash
!pip install dash==0.31.1  # The core dash backend
!pip install dash-html-components==0.13.2  # HTML components
!pip install dash-core-components==0.39.0  # Supercharged components
!pip install dash-table==3.1.7  # Interactive DataTable component (new!)

import pandas as pd
import dash
from dash.dependencies import Input, Output
#from dash import html
#from dash import dcc
import dash_core_components as dcc
import dash_html_components as html
import plotly.express as px
import plotly.graph_objects as go
import random

In [None]:
# This cell is meant for creating functions we need for the word cloud visualisation.

# Here we convert the current CSV to a txt file in order to use it 
df = pd.read_csv('https://raw.githubusercontent.com/Julardzija/Webscraping-Train-Accidents-Wiki/main/DataForVisualising.csv')
df=df["Cause"]
df.to_csv("https://raw.githubusercontent.com/Julardzija/Webscraping-Train-Accidents-Wiki/main/traintext.txt", header=False, index=False)

# Create a function that we can use generally for other files. that is why we put path_to_file
def get_dictionary(path_to_file):
# open text file
    text=open(path_to_file, 'r', encoding='utf-8')
# Creates an empty dictionary which will be our final result for counting causes   
    d = {}
# Read the txt file by lines.    
    lines=text.readlines()
    
# We make it go through all the lines. First we make the code continue if there are no value 
# Next, if the cause is not inside the empty dictionary then we make it count it as first
# If it exists it adds another 1 to the count, thereby counting all causes and how many times
    for line in lines:
        if line == "":
            continue
        if line not in d.keys():
            d[line]=1
        else:
            d[line]+=1
# After the loop close the txt file                
    text.close()           
# Return the dictionary as we need it for visualising the cause in wordcloud                
    return d

# Here we have a function that sets the size and color of each word.
# The number, is how many words we want in our wordcloud.
def getTags(dictionary,number):
# Creating a list for each thing, we want from tags. Word, size and color.
    words = []
    sizes = []
    colors = []
    original_size = []
    
# Making a counter, so see when we get the the maximum number of words.
    counter = 0
    for w in sorted(dictionary, key=dictionary.get, reverse=True):
        
# Getting the word and the size from our dictionary. 
        words.append(w)
        sizes.append(dictionary[w])
        original_size.append(dictionary[w])
        
# We have chosen to only have the words colored green.
        colors.append("green")
        counter += 1

# Break the loop, when we get the the maximum number of words.
        if counter == number:
            break
# Creating the size, of the words in the wordcloud. 
    
    maxvalue = sizes[0]
    for i in range(len(sizes)):
        sizes[i] = sizes[i]/maxvalue*40 + 10
    return (words, sizes, colors, original_size)



In [None]:
# Now we are ready to run the 
# Making the dash site.
app = dash.Dash(__name__)

# Loading the clean dataset
data = pd.read_csv('https://raw.githubusercontent.com/Julardzija/Webscraping-Train-Accidents-Wiki/main/DataForVisualising.csv', encoding='utf-8')

# Set a mininmum year and maximum year, for your time slider. 
# Minimum is the first year with trainaccident
# Maximum year is the year for the last train accident. 
mintime = data["year"][0]
maxtime = data["year"].iloc[-1]

# Creating the layout for the dash app.
# First we are creating the title for the world map of train accidents.
# Second we are greating the html placement for the world map of train accidents.
# Third we put in the graph of number of deaths pr decait in train accidents. 
# Fouth we are making the time slider, which we can use, to chance the shown years of tran accidents on the world map, and deaths pr decaits.
# Fives we made the dropdown menu for the wordcloud with the ranges if  [10,50,100,200] words. 
# And last, we put in the wordcloud.
app.layout = html.Div([
                 html.H1(children="Train accidents",
                         style = {'textAlign':'center', 'font-family' : 'Roboto'}),        
                 html.Div([
                     html.Div([
                         dcc.Graph(id='train-map')
                     ],style={'width':'90%','display':'inline-block','vertical-align':'top','margin':'2%'}),
                 ]),
                 
                 html.H1(children="Number of deaths pr decade",
                         style = {'textAlign':'center', 'font-family' : 'Roboto'}),        
                 html.Div([
                     html.Div([
                         dcc.Graph(id='Deaths_distribution')
                     ],style={'width':'100%','display':'inline-block','vertical-align':'top','margin':'2%'}),
                 ]),                 
                 html.Div([
                     dcc.RangeSlider(
                       id='time-slider',
                       min=mintime,
                       max=maxtime,
                       step=1,
                       value=[mintime,maxtime],
                       marks={i: str(i) for i in range(mintime, maxtime, 10)})
                 ]),
                 
                 html.H1(children='All accident causes',
                         style = {'textAlign':'center', 'font-family' : 'Roboto'}),
                 html.Div([
                     dcc.Dropdown(
                        id='tags-number',
                        options=[{'label': str(i)+" tags", 'value': i} for i in [10,50,100,200]],
                        value=10,
                        style={'width':'50%','margin':'auto'}
                        )
                ]),
                html.Div([
                        dcc.Graph(id='word-cloud')
                ])
])

# Creating a callback to make the worldmap interactive. 
# As an output we have the all the accident shown on the worldmap.
# As an input, we have the time slider, where we can chose with years, we want to see,
# the accidents for on the worldmap.

@app.callback(
    Output(component_id='train-map', component_property='figure'),
    [
        Input(component_id='time-slider', component_property='value')
    ]
)

# Here we opdate the output, so we are only looking at the date for the chosen years.
# We are usen a scatterplot, where latitude and longitude, are the x and y coordinats on the worldmap.
# Then we are using hower_name, so we can see the name of the accident.
# Hover_data shows the location and number of deaths.
# And lastly, we are chosen a color depending on the cause type of the accident. 
def update_map(time):
    mydata = data
    if time != [mintime,maxtime]:
        mydata = mydata[mydata['year'] >= time[0]]
        mydata = mydata[mydata['year'] <= time[1]]
    fig = px.scatter_mapbox(data_frame=mydata, 
                        lat="Latitude",
                        lon="Longitude",
                        hover_name="Accident title",
                        hover_data=["Date","Location","Cause","Deaths"],
                        color="Causetype",
                        size_max=10,
                        zoom=0,
                        height=1000)
    fig.update_layout(mapbox_style="open-street-map")
    return fig

# Creating a callback to make the graph over number of deaths. 
# As an output we have the graph that shows, how many people had died from train accidents pr decaits.
# As an input, we have the time slider, where we can chose with years, we want to see,
# the number of deaths for on the graph.

@app.callback(
    Output(component_id='Deaths_distribution', component_property='figure'),
    [
        Input(component_id='time-slider', component_property='value')
    ]
)

# Here we opdate the output, so we are only looking at the date for the chosen years.
# We are then resetting all the indexes.
# Then we made a for loop, so we can count how many deaths there has been.
# We are setting all the unknowns number of deaths to 0.

def update_death(time):
    cent={}
    x=[]
    y=[]
    
    mydata = data
    if time != [mintime,maxtime]:
        mydata = mydata[mydata['year'] >= time[0]]
        mydata = mydata[mydata['year'] <= time[1]]
    # Plotting the line plot for deaths over the decaits
        mydata=mydata.reset_index(drop=True)
    for i in range(len(mydata)):
    
# Removing all the placed, where deaths number is unknown
        if mydata["Deaths"][i]=="Unknows number of deaths":
           mydata["Deaths"][i]=0 
# adding all the number of deaths together per decaits
        if int(mydata["year"][i]/10) not in cent.keys():
            cent[int(mydata["year"][i]/10)]=int(mydata["Deaths"][i])
        else:
            cent[int(mydata["year"][i]/10)]+=int(mydata["Deaths"][i])


    # Creating the x and y axes for deaths per decaits.
    for i in cent:
      x.append(i*10+10)
      y.append(cent[i])
    
    # Plotting the graphs 
    fig=px.line(x=x,y=y,labels={'x': 'Years','y':'Number of Deaths'},height=400) 
    return fig


# Using the function from wordcloudfunc to count how many time a given accident occurs
train_cause = get_dictionary("traintext.txt")

# Creating a callback to make the wordcloud. 
# As an output we have the wordcloud shows, all the difference accidents.
# As an input, we chose the number of most occuring accidents.
# The number is chosen from the dropdown menu.
# We use tags, to set the size and color of the accidents in the wordcloud. 

@app.callback(
    Output(component_id='word-cloud', component_property='figure'),
    Input(component_id='tags-number', component_property='value'))
def display_wordcloud(number):
    x = random.choices(range(10*number), k=number)
    y = random.choices(range(10*number), k=number)
    # a set of <number> tags with text, size and color information
    tags = getTags(train_cause,number)

    data = go.Scatter(x=x,
                     y=y,
                     mode='text',
                     hovertemplate=tags[3],
                     text=tags[0],
                     marker={'opacity': 0.1},
                     textfont={'size': tags[1], 'color':tags[2]})
    layout = go.Layout({'xaxis': {'showgrid': False, 'showticklabels': False, 'zeroline': False},
                        'yaxis': {'showgrid': False, 'showticklabels': False, 'zeroline': False},
                        'height': 800})
    
    return go.Figure(data=[data], layout=layout)

# Converting to the app. 
if __name__ == '__main__':
    app.run_server(debug=True, port=8080)