In [1]:
# @title Setup
from google.colab import auth
from google.cloud import bigquery
from google.colab import data_table

project = 'bj44966-je38746n' # Project ID inserted based on the query results selected to explore
location = 'us-south1' # Location inserted based on the query results selected to explore
client = bigquery.Client(project=project, location=location)
data_table.enable_dataframe_formatter()
auth.authenticate_user()

## Reference SQL syntax from the original job
Use the ```jobs.query```
[method](https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query) to
return the SQL syntax from the job. This can be copied from the output cell
below to edit the query now or in the future. Alternatively, you can use
[this link](https://console.cloud.google.com/bigquery?j=bj44966-je38746n:us-south1:bquxjob_663246a1_18c5716093a)
back to BigQuery to edit the query within the BigQuery user interface.

In [2]:
# Running this code will display the query used to generate your previous job

job = client.get_job('bquxjob_663246a1_18c5716093a') # Job ID inserted based on the query results selected to explore
print(job.query)

SELECT * FROM `bj44966-je38746n.Heytaxi_bj44966n_je38746n.Table1` LIMIT 1000


# Result set loaded from BigQuery job as a DataFrame
Query results are referenced from the Job ID ran from BigQuery and the query
does not need to be re-run to explore results. The ```to_dataframe```
[method](https://googleapis.dev/python/bigquery/latest/generated/google.cloud.bigquery.job.QueryJob.html#google.cloud.bigquery.job.QueryJob.to_dataframe)
downloads the results to a Pandas DataFrame by using the BigQuery Storage API.

To edit query syntax, you can do so from the BigQuery SQL editor or in the
```Optional:``` sections below.

In [3]:
# Running this code will read results from your previous job

job = client.get_job('bquxjob_663246a1_18c5716093a') # Job ID inserted based on the query results selected to explore
results = job.to_dataframe()
results

Unnamed: 0,vendor_id,pickup_datetime,dropoff_datetime,passenger_count,trip_distance,rate_code,store_and_fwd_flag,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,imp_surcharge,total_amount,pickup_location_id,dropoff_location_id
0,2,0018-12-04 22:20:00+00:00,0018-12-04 22:20:00+00:00,1,0.00,1,False,1,2.5,0.5,0.5,0.76,0.00,0.3,4.56,239,239
1,2,0018-08-04 01:12:00+00:00,0018-08-04 01:12:00+00:00,1,0.00,1,False,1,2.5,0.5,0.5,0.76,0.00,0.3,4.56,90,90
2,2,0018-05-09 07:18:00+00:00,0018-05-09 07:18:00+00:00,1,0.01,1,False,1,2.5,0.0,0.5,0.33,0.00,0.3,5.58,161,161
3,2,0018-05-09 08:01:00+00:00,0018-05-09 08:01:00+00:00,1,0.00,1,False,1,2.5,0.0,0.5,0.82,0.00,0.3,4.12,43,43
4,1,0018-05-30 11:04:00+00:00,0018-05-30 11:04:00+00:00,1,0.10,1,False,1,2.5,0.0,0.5,0.65,0.00,0.3,3.95,164,164
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,1,0018-06-19 23:14:00+00:00,0018-06-19 23:45:00+00:00,1,8.70,1,False,1,29.5,0.5,0.5,6.15,0.00,0.3,36.95,170,116
996,1,0018-07-23 11:18:00+00:00,0018-07-23 11:43:00+00:00,1,9.10,1,False,1,29.5,0.0,0.5,6.06,0.00,0.3,36.36,140,138
997,1,0018-05-19 11:32:00+00:00,0018-05-19 12:15:00+00:00,1,6.50,1,False,1,29.5,0.0,0.5,6.06,0.00,0.3,36.36,40,230
998,1,0018-01-25 04:22:00+00:00,0018-01-25 04:43:00+00:00,1,10.30,1,False,1,29.5,0.5,0.5,7.30,5.76,0.3,43.86,68,138


## Show descriptive statistics using describe()
Use the ```pandas DataFrame.describe()```
[method](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.describe.html)
to generate descriptive statistics. Descriptive statistics include those that
summarize the central tendency, dispersion and shape of a dataset’s
distribution, excluding ```NaN``` values. You may also use other Python methods
to interact with your data.

In [4]:
results.describe()

Unnamed: 0,vendor_id,passenger_count,trip_distance,rate_code,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,imp_surcharge,total_amount,pickup_location_id,dropoff_location_id
count,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0
mean,1.6,1.49,8.18459,1.112,1.0,30.1645,0.33,0.4855,6.76814,1.97087,0.2997,40.03041,156.175,145.723
std,0.514066,1.101414,4.520351,0.601513,0.0,18.726796,0.372476,0.083945,3.936242,3.209026,0.009487,23.396855,62.727275,77.55067
min,1.0,0.0,0.0,1.0,1.0,2.5,0.0,0.0,0.01,0.0,0.0,3.95,4.0,1.0
25%,1.0,1.0,5.9,1.0,1.0,23.5,0.0,0.5,4.86,0.0,0.3,30.2775,132.0,87.0
50%,2.0,1.0,7.8,1.0,1.0,26.5,0.0,0.5,6.0,0.0,0.3,35.105,138.0,142.0
75%,2.0,2.0,9.7,1.0,1.0,32.0,0.5,0.5,8.0,5.76,0.3,45.315,229.0,230.0
max,4.0,6.0,65.1,5.0,1.0,390.0,1.0,0.5,60.0,22.5,0.3,450.3,265.0,265.0
