Permalink
Cannot retrieve contributors at this time
Join GitHub today
GitHub is home to over 50 million developers working together to host and review code, manage projects, and build software together.
Sign up| set hive.execution.engine=mr; | |
| set R = 3959; | |
| set pi=radians(180); | |
| create table if not exists nyctaxidb.nyctaxi_joined ( | |
| medallion string, | |
| hack_license string, | |
| vendor_id string, | |
| rate_code string, | |
| store_and_fwd_flag string, | |
| pickup_datetime string, | |
| dropoff_datetime string, | |
| pickup_hour string, | |
| pickup_week string, | |
| weekday string, | |
| passenger_count int, | |
| trip_time_in_secs double, | |
| trip_distance double, | |
| pickup_longitude double, | |
| pickup_latitude double, | |
| dropoff_longitude double, | |
| dropoff_latitude double, | |
| direct_distance double, | |
| payment_type string, | |
| fare_amount double, | |
| surcharge double, | |
| mta_tax double, | |
| tip_amount double, | |
| tolls_amount double, | |
| total_amount double, | |
| tipped string, | |
| tip_class string | |
| ) | |
| row format delimited fields terminated by ',' | |
| lines terminated by '\n' | |
| stored as textfile; | |
| --- now insert contents of the join into the above internal table | |
| insert overwrite table nyctaxidb.nyctaxi_joined | |
| select | |
| t.medallion, | |
| t.hack_license, | |
| t.vendor_id, | |
| t.rate_code, | |
| t.store_and_fwd_flag, | |
| t.pickup_datetime, | |
| t.dropoff_datetime, | |
| hour(t.pickup_datetime) as pickup_hour, | |
| weekofyear(t.pickup_datetime) as pickup_week, | |
| from_unixtime(unix_timestamp(t.pickup_datetime, 'yyyy-MM-dd HH:mm:ss'),'u') as weekday, | |
| t.passenger_count, | |
| t.trip_time_in_secs, | |
| t.trip_distance, | |
| t.pickup_longitude, | |
| t.pickup_latitude, | |
| t.dropoff_longitude, | |
| t.dropoff_latitude, | |
| t.direct_distance, | |
| f.payment_type, | |
| f.fare_amount, | |
| f.surcharge, | |
| f.mta_tax, | |
| f.tip_amount, | |
| f.tolls_amount, | |
| f.total_amount, | |
| if(tip_amount>0,1,0) as tipped, | |
| if(tip_amount=0,0, | |
| if(tip_amount>0 and tip_amount<=5,1, | |
| if(tip_amount>5 and tip_amount<=10,2, | |
| if(tip_amount>10 and tip_amount<=20,3,4)))) as tip_class | |
| from | |
| ( | |
| select | |
| medallion, | |
| hack_license, | |
| vendor_id, | |
| rate_code, | |
| store_and_fwd_flag, | |
| pickup_datetime, | |
| dropoff_datetime, | |
| passenger_count, | |
| trip_time_in_secs, | |
| trip_distance, | |
| pickup_longitude, | |
| pickup_latitude, | |
| dropoff_longitude, | |
| dropoff_latitude, | |
| ${hiveconf:R}*2*2*atan((1-sqrt(1-pow(sin((dropoff_latitude-pickup_latitude) | |
| *${hiveconf:pi}/180/2),2)-cos(pickup_latitude*${hiveconf:pi}/180) | |
| *cos(dropoff_latitude*${hiveconf:pi}/180)*pow(sin((dropoff_longitude-pickup_longitude)*${hiveconf:pi}/180/2),2))) | |
| /sqrt(pow(sin((dropoff_latitude-pickup_latitude)*${hiveconf:pi}/180/2),2) | |
| +cos(pickup_latitude*${hiveconf:pi}/180)*cos(dropoff_latitude*${hiveconf:pi}/180)*pow(sin((dropoff_longitude-pickup_longitude)*${hiveconf:pi}/180/2),2))) as direct_distance | |
| ---rand() as sample_key | |
| from nyctaxidb.trip | |
| where pickup_latitude between 30 and 90 | |
| and pickup_longitude between -90 and -30 | |
| and dropoff_latitude between 30 and 90 | |
| and dropoff_longitude between -90 and -30 | |
| )t | |
| join | |
| ( | |
| select | |
| medallion, | |
| hack_license, | |
| vendor_id, | |
| pickup_datetime, | |
| payment_type, | |
| fare_amount, | |
| surcharge, | |
| mta_tax, | |
| tip_amount, | |
| tolls_amount, | |
| total_amount | |
| from nyctaxidb.fare | |
| )f | |
| on t.medallion=f.medallion and t.hack_license=f.hack_license and t.pickup_datetime=f.pickup_datetime; | |
| ---where t.sample_key<=0.01 |