# Fetch Dataset

This project uses `chicago_crime` dataset tp describe crime in the city of Chicago.

In [3]:
# import library
from google.cloud import bigquery

# create a "Client" object
client = bigquery.Client()

# construct a reference to the "chicago_crime" dataset
dataset_ref = client.dataset("chicago_crime", project="bigquery-public-data") # accepts dataset id & project 

# API request - fetch the dataset
dataset = client.get_dataset(dataset_ref)
dataset

Using Kaggle's public dataset BigQuery integration.


Dataset(DatasetReference('bigquery-public-data', 'chicago_crime'))

In [4]:
dataset_ref # preview reference
dataset_ref.project # extract project attribute
dataset_ref.dataset_id # extract project id

'chicago_crime'

# Count # of tables in the dataset

In [60]:
# create variable to hold tables:
client_table= client.list_tables(dataset)
tables =list(client_table)
tables

[<google.cloud.bigquery.table.TableListItem at 0x7ac194252bf0>]

In [6]:
#preview properties/attributes of one of the tables:
tables[0].__dict__

{'_properties': {'kind': 'bigquery#table',
  'id': 'bigquery-public-data:chicago_crime.crime',
  'tableReference': {'projectId': 'bigquery-public-data',
   'datasetId': 'chicago_crime',
   'tableId': 'crime'},
  'type': 'TABLE',
  'creationTime': '1492025229548'}}

In [72]:
# print tables in dataset:
for table in tables:
    print(f'table_id: {table.table_id}')

table_id: crime


In [8]:
# create reference to the table:
table_ref = dataset_ref.table("crime")
table_ref.table_id

'crime'

In [9]:
#create API reference to fetch the table:
table = client.get_table(table_ref)
table

Table(TableReference(DatasetReference('bigquery-public-data', 'chicago_crime'), 'crime'))

In [10]:
#structure of the table -> (name, field_type, mode (NULLABLE, REQUIRED), description)
table.schema 

[SchemaField('unique_key', 'INTEGER', 'REQUIRED', None, (), None),
 SchemaField('case_number', 'STRING', 'NULLABLE', None, (), None),
 SchemaField('date', 'TIMESTAMP', 'NULLABLE', None, (), None),
 SchemaField('block', 'STRING', 'NULLABLE', None, (), None),
 SchemaField('iucr', 'STRING', 'NULLABLE', None, (), None),
 SchemaField('primary_type', 'STRING', 'NULLABLE', None, (), None),
 SchemaField('description', 'STRING', 'NULLABLE', None, (), None),
 SchemaField('location_description', 'STRING', 'NULLABLE', None, (), None),
 SchemaField('arrest', 'BOOLEAN', 'NULLABLE', None, (), None),
 SchemaField('domestic', 'BOOLEAN', 'NULLABLE', None, (), None),
 SchemaField('beat', 'INTEGER', 'NULLABLE', None, (), None),
 SchemaField('district', 'INTEGER', 'NULLABLE', None, (), None),
 SchemaField('ward', 'INTEGER', 'NULLABLE', None, (), None),
 SchemaField('community_area', 'INTEGER', 'NULLABLE', None, (), None),
 SchemaField('fbi_code', 'STRING', 'NULLABLE', None, (), None),
 SchemaField('x_coord

In [65]:
# of tables are in the Chicago Crime dataset
table=client.list_tables(dataset)
num_tables = len(list(table)) 
print(f'number of tables : {(num_tables)}')

number of tables : 1


# Table schema

In [67]:
# of columns in the crime table having 'TIMESTAMP' data
table = client.get_table(table_ref)
table.schema

[SchemaField('unique_key', 'INTEGER', 'REQUIRED', None, (), None),
 SchemaField('case_number', 'STRING', 'NULLABLE', None, (), None),
 SchemaField('date', 'TIMESTAMP', 'NULLABLE', None, (), None),
 SchemaField('block', 'STRING', 'NULLABLE', None, (), None),
 SchemaField('iucr', 'STRING', 'NULLABLE', None, (), None),
 SchemaField('primary_type', 'STRING', 'NULLABLE', None, (), None),
 SchemaField('description', 'STRING', 'NULLABLE', None, (), None),
 SchemaField('location_description', 'STRING', 'NULLABLE', None, (), None),
 SchemaField('arrest', 'BOOLEAN', 'NULLABLE', None, (), None),
 SchemaField('domestic', 'BOOLEAN', 'NULLABLE', None, (), None),
 SchemaField('beat', 'INTEGER', 'NULLABLE', None, (), None),
 SchemaField('district', 'INTEGER', 'NULLABLE', None, (), None),
 SchemaField('ward', 'INTEGER', 'NULLABLE', None, (), None),
 SchemaField('community_area', 'INTEGER', 'NULLABLE', None, (), None),
 SchemaField('fbi_code', 'STRING', 'NULLABLE', None, (), None),
 SchemaField('x_coord

In [68]:
table.schema

[SchemaField('unique_key', 'INTEGER', 'REQUIRED', None, (), None),
 SchemaField('case_number', 'STRING', 'NULLABLE', None, (), None),
 SchemaField('date', 'TIMESTAMP', 'NULLABLE', None, (), None),
 SchemaField('block', 'STRING', 'NULLABLE', None, (), None),
 SchemaField('iucr', 'STRING', 'NULLABLE', None, (), None),
 SchemaField('primary_type', 'STRING', 'NULLABLE', None, (), None),
 SchemaField('description', 'STRING', 'NULLABLE', None, (), None),
 SchemaField('location_description', 'STRING', 'NULLABLE', None, (), None),
 SchemaField('arrest', 'BOOLEAN', 'NULLABLE', None, (), None),
 SchemaField('domestic', 'BOOLEAN', 'NULLABLE', None, (), None),
 SchemaField('beat', 'INTEGER', 'NULLABLE', None, (), None),
 SchemaField('district', 'INTEGER', 'NULLABLE', None, (), None),
 SchemaField('ward', 'INTEGER', 'NULLABLE', None, (), None),
 SchemaField('community_area', 'INTEGER', 'NULLABLE', None, (), None),
 SchemaField('fbi_code', 'STRING', 'NULLABLE', None, (), None),
 SchemaField('x_coord

In [70]:
# of fields having "TIMESTAMP" data type
c=0;
for i in range(len(table.schema)):
    if table.schema[i].field_type=='TIMESTAMP':
        c+=1
num_timestamp_fields = c   
print(f'# of fields having "TIMESTAMP" data type: {num_timestamp_fields}')

# of fields having "TIMESTAMP" data type: 2


In [45]:
# construct a reference to the "crime" table
table_ref = dataset_ref.table("crime")

# API request - fetch the table
table = client.get_table(table_ref)

# Print information on all the columns in the "crime" table in the "chicago_crime" dataset
print(table.schema)

[SchemaField('unique_key', 'INTEGER', 'REQUIRED', None, (), None), SchemaField('case_number', 'STRING', 'NULLABLE', None, (), None), SchemaField('date', 'TIMESTAMP', 'NULLABLE', None, (), None), SchemaField('block', 'STRING', 'NULLABLE', None, (), None), SchemaField('iucr', 'STRING', 'NULLABLE', None, (), None), SchemaField('primary_type', 'STRING', 'NULLABLE', None, (), None), SchemaField('description', 'STRING', 'NULLABLE', None, (), None), SchemaField('location_description', 'STRING', 'NULLABLE', None, (), None), SchemaField('arrest', 'BOOLEAN', 'NULLABLE', None, (), None), SchemaField('domestic', 'BOOLEAN', 'NULLABLE', None, (), None), SchemaField('beat', 'INTEGER', 'NULLABLE', None, (), None), SchemaField('district', 'INTEGER', 'NULLABLE', None, (), None), SchemaField('ward', 'INTEGER', 'NULLABLE', None, (), None), SchemaField('community_area', 'INTEGER', 'NULLABLE', None, (), None), SchemaField('fbi_code', 'STRING', 'NULLABLE', None, (), None), SchemaField('x_coordinate', 'FLOAT'

In [54]:
df=client.list_rows(table, max_results=5).to_dataframe()
df

Unnamed: 0,unique_key,case_number,date,block,iucr,primary_type,description,location_description,arrest,domestic,...,ward,community_area,fbi_code,x_coordinate,y_coordinate,year,updated_on,latitude,longitude,location
0,10655248,HZ405004,2016-08-24 06:00:00+00:00,0000X W LAKE ST,263,CRIM SEXUAL ASSAULT,AGGRAVATED: KNIFE/CUT INSTR,ALLEY,False,False,...,42,32,2,1175790.0,1901739.0,2016,2018-02-10 03:50:01+00:00,41.885741,-87.629916,"(41.88574102, -87.62991594)"
1,2604346,HJ200978,2003-02-23 07:27:01+00:00,003XX S JEFFERSON ST,265,CRIM SEXUAL ASSAULT,AGGRAVATED: OTHER,ALLEY,False,False,...,2,28,2,1172382.0,1898709.0,2003,2018-02-28 03:56:25+00:00,41.877502,-87.64252,"(41.877502477, -87.642520263)"
2,8552135,HV226583,2012-03-30 01:00:00+00:00,0000X E WACKER DR,281,CRIM SEXUAL ASSAULT,NON-AGGRAVATED,HOTEL/MOTEL,False,False,...,42,32,2,1176956.0,1902517.0,2012,2016-02-04 06:33:39+00:00,41.88785,-87.625611,"(41.887849586, -87.625610638)"
3,7216226,HR631667,2009-11-07 06:48:00+00:00,005XX W MADISON ST,312,ROBBERY,ARMED:KNIFE/CUTTING INSTRUMENT,RESTAURANT,False,False,...,42,28,3,1172605.0,1900295.0,2009,2018-02-28 03:56:25+00:00,41.88185,-87.641655,"(41.881849637, -87.641654542)"
4,9375205,HW518668,2013-11-03 02:52:00+00:00,0000X W WASHINGTON ST,312,ROBBERY,ARMED:KNIFE/CUTTING INSTRUMENT,SIDEWALK,False,False,...,42,32,3,1175800.0,1900817.0,2013,2018-02-10 03:50:01+00:00,41.883211,-87.629907,"(41.883210775, -87.629906972)"


# Features for Crime map

Features in`crime` table to plot crimes on a map

In [71]:
fields_for_plotting = ['latitude', 'longitude'] 