In [None]:
from pyspark.sql import SparkSession
from pyspark.sql.types import *

crimes_schema = StructType([StructField("INCIDENT_NUMBER", StringType(), True),
                            StructField("OFFENSE_CODE", StringType(), True),
                            StructField("OFFENSE_CODE_GROUP", StringType(), True),
                            StructField("OFFENSE_DESCRIPTION", StringType(), True),
                            StructField("DISTRICT", StringType(), True),
                            StructField("REPORTING_AREA", StringType(), True),
                            StructField("SHOOTING", StringType(), True),
                            StructField("OCCURRED_ON_DATE", TimestampType(), True),
                            StructField("YEAR", IntegerType(), True),
                            StructField("MONTH", IntegerType(), True),
                            StructField("DAY_OF_WEEK", StringType(), True),
                            StructField("HOUR", IntegerType(), True),
                            StructField("UCR_PART",StringType(), True),
                            StructField("STREET", StringType(), True),
                            StructField("Lat", StringType(), True),
                            StructField("Long", StringType(), True),
                            StructField("Location", StringType(), True)])

spark = SparkSession.builder.appName("Crimes in boston").getOrCreate()

crimes = spark.read.csv("./data/BostenCrime.csv", header = True, schema = crimes_schema)

In [None]:
crimes

In [None]:
crimes.select("MONTH").distinct().orderBy("MONTH").show(12)

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

def show_crime_per_hour_perMonth(hour):

    x_axis = [i+1 for i in range(11)]
    y_axis = []
    
    result = crimes.filter("HOUR="+hour).groupBy("MONTH").count().orderBy("MONTH")
    data = result.take(12)
    for i in range(1, 12):
        y_axis.append(data[i-1][1])

    fig1 = plt.figure(figsize=(7, 7))
    fig1.suptitle('Crimes per month per within a hour', fontsize=14)
    ax = fig1.add_subplot(111)
    ax.set_xlabel('month')
    ax.set_ylabel('Crime')
    ax.bar(x_axis, y_axis, fc='darksalmon', align='center')
    plt.show()
    
    return

In [None]:
from IPython.display import HTML

time = '12'

form = """
<div style='background-color:gainsboro; border:solid black; width:300px; padding:20px;'>
Hour : <input id='hour' type='range' min='0' max='23' step='1' value='12' onchange='set_vars()'/>
</div>"""

javascript = """
<script type="text/Javascript">
    const kernelCell = 5;

    function set_vars(){
        var time = document.getElementById('hour').value;
        var kernel = IPython.notebook.kernel;
        
        kernel.execute('time = "' + time + '"');
        cell = IPython.notebook.get_cell(kernelCell);
        cell.execute();
        cell.focus_cell(kernelCell);
    }
</script>
"""

HTML(form + javascript)

In [None]:
print('Time: ' + str(time))
show_crime_per_hour_perMonth(time)