In [2]:
from IPython.display import HTML
import requests
from transformers import pipeline


In [3]:
def main(paras):
    if paras['SHOW_HEAT_MAP'] == True:
        heat_map()
    size = paras['MASTODON_SIZE']
    port = paras['MASTODON_PORT']
    mastodon(port, size)

    
    

In [4]:
from IPython.display import HTML

def heat_map():
    viz_url = "https://public.tableau.com/views/CrimeByAreaMap/CrimebyLocationMap?:language=en-US&publish=yes&:sid=&:display_count=n&:origin=viz_share_link"
    html_code = f'''
    <div class='tableauPlaceholder' id='vizContainer' style='position: relative; width: 100%; height: 800px;'>
        <noscript><a href='#'>
            <img alt='Tableau Visualization' src='{viz_url}.png' style='border: none' />
        </a></noscript>
        <object class='tableauViz' width='100%' height='800' style='display:none;'>
            <param name='host_url' value='https%3A%2F%2Fpublic.tableau.com%2F' /> 
            <param name='embed_code_version' value='3' /> 
            <param name='site_root' value='' />
            <param name='name' value='CrimeByAreaMap&#47;CrimebyLocationMap' />
            <param name='tabs' value='no' />
            <param name='toolbar' value='yes' />
            <param name='static_image' value='{viz_url}.png' /> 
            <param name='animate_transition' value='yes' />
            <param name='display_static_image' value='yes' />
            <param name='display_spinner' value='yes' />
            <param name='display_overlay' value='yes' />
            <param name='display_count' value='yes' />
            <param name='language' value='en-US' />
        </object>
    </div>
    <script type='text/javascript'>
        var divElement = document.getElementById('vizContainer');                    
        var vizElement = divElement.getElementsByTagName('object')[0];                    
        vizElement.style.width = '100%';                    
        vizElement.style.height = (divElement.offsetWidth * 0.8) + 'px';                    
        var scriptElement = document.createElement('script');                    
        scriptElement.src = 'https://public.tableau.com/javascripts/api/viz_v1.js';                    
        vizElement.parentNode.insertBefore(scriptElement, vizElement);                
    </script>
    '''
    display(HTML(html_code))

In [5]:
def get_mastodon_data(port,size):
    fission_url = f"http://localhost:{port}/mastodon/{size}"
    response = requests.get(fission_url, verify = False)
    return response.json()

def check_crime_related(content, keywords):
    content_lower = content.lower()
    return any(keyword in content_lower for keyword in keywords)

def mastodon(port, size):
    if int(size) <= 0:
        return print('Please check size')
    data = get_mastodon_data(port,size)
    filtered_data = [{'time': item['_source']['created_at'], 'content': item['_source']['content']} for item in data]
    crime_keywords = ['crime', 'theft', 'murder', 'assault', 'fraud', 'robbery', 'burglary', 'arson', 
                  'kidnapping', 'drug', 'trafficking', 'violence','vandalism', 'smuggling', 'extortion', 
    'blackmail', 'embezzlement', 'bribery', 'corruption', 'homicide', 'manslaughter', 'gang', 
    'terrorist', 'terrorism', 'assault', 'battery', 'abuse', 'harassment', 'molestation', 
    'rape', 'domestic violence', 'cybercrime', 'identity theft', 'forgery', 'counterfeit', 
    'human trafficking', 'organized crime', 'illegal', 'unlawfully', 'laundering', 
    'money laundering', 'perjury', 'prostitution', 'racket', 'racketeering', 'sabotage', 
    'scam', 'shoplifting', 'slander', 'stalking', 'swindle', 'terrorism', 'threat', 
    'trespassing', 'underworld', 'weapon', 'weapons', 'smuggling', 'conspiracy']
    crime_related_records = [item for item in filtered_data if check_crime_related(item['content'], crime_keywords)]

    classifier = pipeline("text-classification", model="distilbert-base-uncased-finetuned-sst-2-english")
    
    results = classifier([item['content'] for item in crime_related_records])

    if len(crime_related_records) == 0:
        print('None of',size,'are related to crime')
    else:
        print(len(crime_related_records),'of',size,'may be related to crime', '\n')
        for record, result in zip(crime_related_records, results):
            print('Time:', record['time'][0:10], record['time'][11:18])
            print('Content:',record['content'])
            print('Sentiment:', result['label'], '\n')
    
    

## Parameter Descriptions:

- `MASTODON_SIZE`: is the number of posts searched from real-time mastodon serve to find the posts may be related to crime.
- `SHOW_HEAT_MAP`: Set to `True` if you want to show heat map of crimes in victoria.

In [7]:
paras = {'SHOW_HEAT_MAP': False,
         'MASTODON_SIZE': -1.5,
         'MASTODON_PORT': 9000,
        }
main(paras)

Please check size
