In [1]:
import findspark
findspark.init()

from pyspark.sql.functions import isnan, when, count, col

In [2]:
from pyspark.sql import SparkSession
from pyspark import SparkConf, SparkContext

import pandas as pd
pd.options.display.max_rows = 100

# Configure Spark session

In [3]:
def run():
    from pyspark import SparkContext, SparkConf

    conf = SparkConf()
    conf.setAppName('cars')
    conf.set("spark.driver.memory", "5g")
    conf.set("spark.driver.memoryOverhead", "0.10")
    sc = SparkContext(conf=conf)
    
    return sc

spark = SparkSession(run())

# turn on a dataframe view
spark.conf.set('spark.sql.repl.eagerEval.enabled', True)

In [None]:
# Summary function
def user_activity_workout_summarize(df):
    user_count = format(df.select('userId').distinct().count(), ',d')
    workout_count = format(df.select('id').distinct().count(), ',d')
    activity_count = str(df.select('sport').distinct().count())
    seqOp = (lambda x,y: x+y)
    sum_temp = df.rdd.map(lambda x: len(x.timestamp)).aggregate(0, seqOp, seqOp)
    total_records_count = format(sum_temp, ',d')
    columns=['Users count', 'Activity types count','Workouts count', 'Total records count']
    data = [[user_count], [activity_count], [workout_count], [total_records_count]]
    sum_dict = {column: data[i] for i, column in enumerate(columns)}
    sum_df = pd.DataFrame.from_dict(sum_dict)[columns]
    gender_user_count = df.select('gender','userId').distinct().groupBy('gender').count().toPandas()
    gender_activities_count = df.groupBy('gender').count().toPandas()
    gender_user_activity_count = gender_user_count.join(
        gender_activities_count.set_index('gender'), on='gender'
        , how='inner', lsuffix='_gu'
    )
    gender_user_activity_count.columns = ['Gender', '# of users', 'Activities (workouts) count']
    
    return sum_df, gender_user_activity_count

sum_dfs = user_activity_workout_summarize(df)
print('\nOverall data set summary on users, activities(workouts) and number of fitness records:')
sum_dfs[0]
print('\nSummarize on genders:')
sum_dfs[1]

# Load data

In [9]:
df = spark.read.options(delimiter = '.').csv(path = "used_cars_data.csv", sep = ',',  header = True)

In [10]:
display(df)

vin,back_legroom,bed,bed_height,bed_length,body_type,cabin,city,city_fuel_economy,combine_fuel_economy,daysonmarket,dealer_zip,description,engine_cylinders,engine_displacement,engine_type,exterior_color,fleet,frame_damaged,franchise_dealer,franchise_make,front_legroom,fuel_tank_volume,fuel_type,has_accidents,height,highway_fuel_economy,horsepower,interior_color,isCab,is_certified,is_cpo,is_new,is_oemcpo,latitude,length,listed_date,listing_color,listing_id,longitude,main_picture_url,major_options,make_name,maximum_seating,mileage,model_name,owner_count,power,price,salvage,savings_amount,seller_rating,sp_id,sp_name,theft_title,torque,transmission,transmission_display,trimId,trim_name,vehicle_damage_category,wheel_system,wheel_system_display,wheelbase,width,year
ZACNJABB5KPJ92081,35.1 in,,,,SUV / Crossover,,Bayamon,,,522,960,[!@@Additional In...,I4,1300.0,I4,Solar Yellow,,,True,Jeep,41.2 in,12.7 gal,Gasoline,,66.5 in,,177.0,Black,,,,True,,18.3988,166.6 in,2019-04-06,YELLOW,237132766,-66.1582,https://static.ca...,['Quick Order Pac...,Jeep,5 seats,7.0,Renegade,,"177 hp @ 5,750 RPM",23141.0,,0,2.8,370599,Flagship Chrysler,,"200 lb-ft @ 1,750...",A,9-Speed Automatic...,t83804,Latitude FWD,,FWD,Front-Wheel Drive,101.2 in,79.6 in,2019
SALCJ2FX1LH858117,38.1 in,,,,SUV / Crossover,,San Juan,,,207,922,[!@@Additional In...,I4,2000.0,I4,Narvik Black,,,True,Land Rover,39.1 in,17.7 gal,Gasoline,,68 in,,246.0,Black (Ebony),,,,True,,18.4439,181 in,2020-02-15,BLACK,265946296,-66.0785,https://static.ca...,['Adaptive Cruise...,Land Rover,7 seats,8.0,Discovery Sport,,"246 hp @ 5,500 RPM",46500.0,,0,3.0,389227,Land Rover San Juan,,"269 lb-ft @ 1,400...",A,9-Speed Automatic...,t86759,S AWD,,AWD,All-Wheel Drive,107.9 in,85.6 in,2020
JF1VA2M67G9829723,35.4 in,,,,Sedan,,Guaynabo,17.0,,1233,969,,H4,2500.0,H4,,False,False,True,FIAT,43.3 in,15.9 gal,Gasoline,False,58.1 in,23.0,305.0,,False,,,False,,18.3467,180.9 in,2017-04-25,UNKNOWN,173473508,-66.1098,,"['Alloy Wheels', ...",Subaru,5 seats,,WRX STI,3.0,"305 hp @ 6,000 RPM",46995.0,False,0,,370467,FIAT de San Juan,False,"290 lb-ft @ 4,000...",M,6-Speed Manual,t58994,Base,,AWD,All-Wheel Drive,104.3 in,78.9 in,2016
SALRR2RV0L2433391,37.6 in,,,,SUV / Crossover,,San Juan,,,196,922,[!@@Additional In...,V6,3000.0,V6,Eiger Gray,,,True,Land Rover,39 in,23.5 gal,Gasoline,,73 in,,340.0,Gray (Ebony/Ebony...,,,,True,,18.4439,195.1 in,2020-02-26,GRAY,266911050,-66.0785,https://static.ca...,,Land Rover,7 seats,11.0,Discovery,,"340 hp @ 6,500 RPM",67430.0,,0,3.0,389227,Land Rover San Juan,,"332 lb-ft @ 3,500...",A,8-Speed Automatic...,t86074,V6 HSE AWD,,AWD,All-Wheel Drive,115 in,87.4 in,2020
SALCJ2FXXLH862327,38.1 in,,,,SUV / Crossover,,San Juan,,,137,922,[!@@Additional In...,I4,2000.0,I4,Narvik Black,,,True,Land Rover,39.1 in,17.7 gal,Gasoline,,68 in,,246.0,Black (Ebony),,,,True,,18.4439,181 in,2020-04-25,BLACK,270957414,-66.0785,https://static.ca...,['Adaptive Cruise...,Land Rover,7 seats,7.0,Discovery Sport,,"246 hp @ 5,500 RPM",48880.0,,0,3.0,389227,Land Rover San Juan,,"269 lb-ft @ 1,400...",A,9-Speed Automatic...,t86759,S AWD,,AWD,All-Wheel Drive,107.9 in,85.6 in,2020
SALYK2EX1LA261711,37.1 in,,,,SUV / Crossover,,San Juan,,,242,922,[!@@Additional In...,I4,2000.0,I4,Kaikoura Stone,False,False,True,Land Rover,40.2 in,16.6 gal,Gasoline,False,66.3 in,,247.0,Brown (Ebony / Eb...,False,,,True,,18.4439,188.9 in,2020-01-11,UNKNOWN,262940541,-66.0785,https://static.ca...,"['Leather Seats',...",Land Rover,5 seats,12.0,Range Rover Velar,,"247 hp @ 5,500 RPM",66903.0,False,0,3.0,389227,Land Rover San Juan,False,"269 lb-ft @ 1,200...",A,8-Speed Automatic...,t85614,P250 R-Dynamic S AWD,,AWD,All-Wheel Drive,113.1 in,84.4 in,2020
3MZBPABL6KM107908,35.1 in,,,,Sedan,,Bayamon,,,447,960,[!@@Additional In...,I4,2500.0,I4,SONIC SILVER,,,True,Jeep,42.3 in,13.2 gal,Gasoline,,56.9 in,,186.0,Black,,,,True,,18.3988,183.5 in,2019-06-20,SILVER,244110426,-66.1582,https://static.ca...,"['Alloy Wheels', ...",Mazda,5 seats,14.0,MAZDA3,,"186 hp @ 6,000 RPM",23695.0,,0,2.8,370599,Flagship Chrysler,,"186 lb-ft @ 4,000...",A,6-Speed Automatic...,t85256,Sedan FWD,,FWD,Front-Wheel Drive,107.3 in,70.7 in,2019
SALYK2EX5LA275434,37.1 in,,,,SUV / Crossover,,San Juan,,,70,922,[!@@Additional In...,I4,2000.0,I4,Fuji White,,,True,Land Rover,40.2 in,16.6 gal,Gasoline,,66.3 in,,247.0,White (Eclipse / ...,,,,True,,18.4439,188.9 in,2020-07-01,WHITE,275458784,-66.0785,https://static.ca...,['Adaptive Cruise...,Land Rover,5 seats,11.0,Range Rover Velar,,"247 hp @ 5,500 RPM",68520.0,,0,3.0,389227,Land Rover San Juan,,"269 lb-ft @ 1,200...",A,8-Speed Automatic...,t85614,P250 R-Dynamic S AWD,,AWD,All-Wheel Drive,113.1 in,84.4 in,2020
SALCJ2FX6LH858128,38.1 in,,,,SUV / Crossover,,San Juan,,,196,922,[!@@Additional In...,I4,2000.0,I4,Eiger Gray,,,True,Land Rover,39.1 in,17.7 gal,Gasoline,,68 in,,246.0,Black (Ebony),,,,True,,18.4439,181 in,2020-02-26,GRAY,266911040,-66.0785,https://static.ca...,['Navigation Syst...,Land Rover,7 seats,8.0,Discovery Sport,,"246 hp @ 5,500 RPM",51245.0,,0,3.0,389227,Land Rover San Juan,,"269 lb-ft @ 1,400...",A,9-Speed Automatic...,t86759,S AWD,,AWD,All-Wheel Drive,107.9 in,85.6 in,2020
SALZL2GX4LH007593,33.8 in,,,,SUV / Crossover,,San Juan,,,510,922,[!@@Additional In...,I4,2000.0,I4,Blanco,False,False,True,Land Rover,40 in,17.7 gal,Gasoline,False,64.9 in,,296.0,Eclipse/Ebony,False,,,False,,18.4439,172.1 in,2019-04-18,WHITE,238225156,-66.0785,https://static.ca...,"['Leather Seats',...",Land Rover,5 seats,254.0,Range Rover Evoque,,"296 hp @ 5,500 RPM",84399.0,False,0,3.0,389227,Land Rover San Juan,False,"295 lb-ft @ 1,600...",A,9-Speed Automatic...,t85531,P300 R-Dynamic SE...,,AWD,All-Wheel Drive,105.6 in,82.7 in,2020


In [11]:
# save to parquet
df.write.parquet("/tmp/parquet/cars.parquet")

In [4]:
# read from parquet
parDF=spark.read.parquet("/tmp/parquet/cars.parquet")

# Data overview

In [5]:
# shape
print(format(parDF.count(), ',d'), '\ncol cnt:', len(parDF.columns))

3,000,507 
col cnt: 66


In [6]:
# data types
print('Columns & datatypes:')
pd.DataFrame(parDF.dtypes, columns =['col_name', 'data_type'])

Columns & datatypes:


Unnamed: 0,col_name,data_type
0,vin,string
1,back_legroom,string
2,bed,string
3,bed_height,string
4,bed_length,string
5,body_type,string
6,cabin,string
7,city,string
8,city_fuel_economy,string
9,combine_fuel_economy,string


###### get count from descriptive stats

In [7]:
# descriptive 
descr = parDF.describe().toPandas()
descr

Unnamed: 0,summary,vin,back_legroom,bed,bed_height,bed_length,body_type,cabin,city,city_fuel_economy,...,transmission,transmission_display,trimId,trim_name,vehicle_damage_category,wheel_system,wheel_system_display,wheelbase,width,year
0,count,3000507,2840909,19679,429196,429191,2986589,63622,3000133,2508829,...,2918550,2912147,2880702,2884802,1158444,2853203,2848662,2828164,2830498,2946381
1,mean,Infinity,,4081.25,5166.666666666667,63740.0,5000.0,36161.2,23104.127042857144,22.706573085279945,...,6938472.700991163,9321036.134121323,1.1656370838791057E7,1.1037576677955545E7,1.0958303484245954E7,1.267889072667737E7,1.3396662595330501E7,1.4679855323887112E7,1.3845013514002014E7,342090.8502279081
2,stddev,,,1653.3677751788923,1474.2229591663988,109272.00131781244,0.0,47620.57877850709,53347.21956367153,16.079865342342412,...,4.298426253523661E7,4.970049252064884E7,5.538545201250243E7,5.397217480572898E7,5.374599443231616E7,5.7648088615293436E7,5.918315832965848E7,6.178457258018881E7,6.009541186023207E7,9667363.89268572
3,min,Heated Windshield Washer nozzles,WE ARE AWAY'S LOOKING FOR CLEAN CARS AND TRU...,WE ARE AWAY'S LOOKING FOR CLEAN CARS AND TRU...,WE ARE AWAY'S LOOKING FOR CLEAN CARS AND TRU...,WE ARE AWAY'S LOOKING FOR CLEAN CARS AND TRU...,'Blind Spot Monitoring',WE ARE AWAY'S LOOKING FOR CLEAN CARS AND TRU...,& belt. Recently changed the oil,WE ARE AWAY'S LOOKING FOR CLEAN CARS AND TRU...,...,,,,,,,,,,
4,max,type: SUV,V8 Flex Fuel Vehicle,Volkswagen,V8 Flex Fuel Vehicle,White,White,V8,private seller,V8,...,~BACK UP CAMERA~,~~ VERY LOW MILES ~~,~GM CERTIFIED,~~~ Call (502) 695-9050 to check availability ...,~~~ Call (502) 695-9050 to check availability ...,~PUSH BUTTON START~,~~~ Call (502) 695-9050 to check availability ...,~PUSH BUTTON START~,~GM CERTIFIED,~~~ Call (502) 695-9050 to check availability ...


In [8]:
# get count (descending order)
descr_trans = descr.T
descr_trans.columns = descr_trans.iloc[0]

descr_trans = descr_trans.drop(descr_trans.index[0])
descr_trans['count'] = descr_trans['count'].astype('int64') 
descr_trans['count'].sort_values(ascending=False)

vin                        3000507
city                       3000133
daysonmarket               3000126
dealer_zip                 3000126
exterior_color             2996558
body_type                  2986589
franchise_dealer           2985724
model_name                 2975734
interior_color             2973804
make_name                  2972421
price                      2969827
sp_name                    2967403
longitude                  2964886
sp_id                      2963782
savings_amount             2959047
listing_id                 2954554
year                       2946381
listing_color              2939284
is_new                     2935768
seller_rating              2931478
mileage                    2929166
engine_cylinders           2928909
engine_type                2926986
listed_date                2925745
description                2922226
fuel_type                  2921440
transmission               2918550
transmission_display       2912147
latitude            

In [9]:
parDF.where(col('city').isNotNull()).count()

3000133

###### get top n values by count from every column

In [46]:
descr_trans

summary,count,mean,stddev,min,max,top_n_values
vin,3000507,inf,,Heated Windshield Washer nozzles,type: SUV,"{' Sales Department open until 7 PM on Mondays. Doing business with Fairmont Ford will put a smile on your face. Established in 1909': 15, '5FNYF6H94MB000906': 2, '2HKRW2H52LH679102': 2, '2FMPK4K97LBB07668': 2, '1N6AD0EV6KN713209': 2, '1FADP3F29FL349680': 2, '5N1AZ2CSXLN157394': 2, '5XYPG4A31KG501728': 2, '1N4BL4DV0LC262865': 2, '5FNYF6H10MB018426': 2}"
back_legroom,2840909,,,WE ARE AWAY'S LOOKING FOR CLEAN CARS AND TRUCKS **COMING FROM OUT OF TOWN? We are 30 Miles from Ontario CA International Airport **OUT OF STATE ? NEED YOUR VEHICLE TRANSPORTED TO YOU? No problem visit our website for a free quote. Price excludes government fees and taxes,V8 Flex Fuel Vehicle,"{None: 159598, '38.3 in': 113891, '43.6 in': 102491, '37.4 in': 87363, '--': 83458, '35.7 in': 77162, '39 in': 76100, '40.4 in': 72784, '38 in': 66933, '38.6 in': 63955}"
bed,19679,4081.25,1653.3677751788923,WE ARE AWAY'S LOOKING FOR CLEAN CARS AND TRUCKS **COMING FROM OUT OF TOWN? We are 30 Miles from Ontario CA International Airport **OUT OF STATE ? NEED YOUR VEHICLE TRANSPORTED TO YOU? No problem visit our website for a free quote. Price excludes government fees and taxes,Volkswagen,"{None: 2980828, 'Short': 11822, 'Long': 4878, 'Regular': 2868, ' any finance charges': 14, ' family and neighbors. Come join the Fairmont Ford Family today![!@@Additional Info@@!]Engine': 4, ' family and neighbors. Come join the Fairmont Ford Family today![!@@Additional Info@@!]3.65 Axle Ratio|GVWR: 6': 4, ' 'Navigation System'': 3, '4000.0': 2, ' 'Bluetooth'': 2}"
bed_height,429196,5166.666666666667,1474.2229591663988,WE ARE AWAY'S LOOKING FOR CLEAN CARS AND TRUCKS **COMING FROM OUT OF TOWN? We are 30 Miles from Ontario CA International Airport **OUT OF STATE ? NEED YOUR VEHICLE TRANSPORTED TO YOU? No problem visit our website for a free quote. Price excludes government fees and taxes,V8 Flex Fuel Vehicle,"{None: 2571311, '--': 429098, ' any dealer document preparation charge': 14, 'V8': 6, 'I4': 5, 'V6': 4, '160 lbs|Electronic Transfer Case|Automatic Full-Time Four-Wheel Drive|72-Amp/Hr 650CCA Maintenance-Free Battery w/Run Down Protection|200 Amp Alternator|Towing Equipment -inc: Trailer Sway Control|Gas-Pressurized Shock Absorbers|Front And Rear Anti-Roll Bars|Electric Power-Assist Speed-Sensing Steering|18.6 Gal. Fuel Tank|Quasi-Dual Stainless Steel Exhaust w/Chrome Tailpipe Finisher|Auto Locking Hubs|Strut Front Suspension w/Coil Springs|Multi-Link Rear Suspension w/Coil Springs|4-Wheel Disc Brakes w/4-Wheel ABS': 4, ' 3.6L V6': 3, 'I6 Diesel': 2, ' MN': 2}"
bed_length,429191,63740.0,109272.00131781244,WE ARE AWAY'S LOOKING FOR CLEAN CARS AND TRUCKS **COMING FROM OUT OF TOWN? We are 30 Miles from Ontario CA International Airport **OUT OF STATE ? NEED YOUR VEHICLE TRANSPORTED TO YOU? No problem visit our website for a free quote. Price excludes government fees and taxes,White,"{None: 2571316, '67.1 in': 83655, '67.4 in': 50872, '69.9 in': 44920, '78.9 in': 38342, '76.3 in': 32738, '69.3 in': 20998, '61.7 in': 12700, '60.3 in': 11807, '98.3 in': 11799}"
body_type,2986589,5000.0,0.0,'Blind Spot Monitoring',White,"{'SUV / Crossover': 1416402, 'Sedan': 742036, 'Pickup Truck': 474595, 'Hatchback': 88374, 'Minivan': 79802, 'Coupe': 71607, 'Van': 47166, 'Wagon': 40505, 'Convertible': 26010, None: 13918}"
cabin,63622,36161.2,47620.57877850709,WE ARE AWAY'S LOOKING FOR CLEAN CARS AND TRUCKS **COMING FROM OUT OF TOWN? We are 30 Miles from Ontario CA International Airport **OUT OF STATE ? NEED YOUR VEHICLE TRANSPORTED TO YOU? No problem visit our website for a free quote. Price excludes government fees and taxes,V8,"{None: 2936885, 'Crew Cab': 51083, 'Extended Cab': 7960, 'Regular Cab': 2966, 'Large Crew Cab': 1524, 'False': 20, ' Hill Descent Control and Hill Hold Control|Tires: P245/60R18 AS BSW|Steel Spare Wheel|Compact Spare Tire Mounted Inside Under Cargo|Clearcoat Paint|Body-Colored Front Bumper w/Black Rub Strip/Fascia Accent and Metal-Look Bumper Insert|Body-Colored Rear Bumper w/Black Rub Strip/Fascia Accent and Metal-Look Bumper Insert|Black Side Windows Trim and Black Front Windshield Trim|Chrome Bodyside Insert': 4, ' 271 lb-ft of torque @ 5000 rpm [365.9 N-m]) (Available on TND26 FWD model. Standard on TNL26 AWD model.)|Axle': 3, '400 lbs|50 State Emissions|Manual Transfer Case|Part-Time Four-Wheel Drive|Engine oil cooler|600CCA Maintenance-Free Battery|160 Amp Alternator|Towing Equipment -inc: Trailer Sway Control|2 Skid Plates|1000# Maximum Payload|Front And Rear Anti-Roll Bars|Gas-Pressurized Shock Absorbers|Hydraulic Power-Assist Steering|22.5 Gal. Fuel Tank|Single Stainless Steel Exhaust|Auto Locking Hubs|Leading Link Front Suspension w/Coil Springs|Trailing Arm Rear Suspension w/Coil Springs|4-Wheel Disc Brakes w/4-Wheel ABS': 2, ' Hill Descent Control and Hill Hold Control|Steel Spare Wheel|Full-Size Spare Tire Stored Underbody w/Crankdown|Clearcoat Paint|Body-Colored Rear Step Bumper w/Body-Colored Rub Strip/Fascia Accent|Body-Colored Front Bumper w/Body-Colored Rub Strip/Fascia Accent and 2 Tow Hooks|Black Side Windows Trim and Black Front Windshield Trim|Body-Colored Bodyside Cladding and Body-Colored Fender Flares|Chrome door handles|Body-Colored Power Heated Side Mirrors w/Driver Auto Dimming': 2}"
city,3000133,23104.127042857144,53347.21956367153,& belt. Recently changed the oil,private seller,"{'Houston': 43777, 'San Antonio': 24947, 'Columbus': 18333, 'Miami': 18198, 'Jacksonville': 16837, 'Las Vegas': 16396, 'Tampa': 15353, 'Phoenix': 14791, 'Dallas': 14641, 'Orlando': 14499}"
city_fuel_economy,2508829,22.706573085279945,16.079865342342412,WE ARE AWAY'S LOOKING FOR CLEAN CARS AND TRUCKS **COMING FROM OUT OF TOWN? We are 30 Miles from Ontario CA International Airport **OUT OF STATE ? NEED YOUR VEHICLE TRANSPORTED TO YOU? No problem visit our website for a free quote. Price excludes government fees and taxes,V8,"{None: 491678, '19.0': 198958, '18.0': 191823, '22.0': 174756, '21.0': 170910, '20.0': 169255, '17.0': 165191, '16.0': 164005, '26.0': 153793, '15.0': 143499}"
combine_fuel_economy,86,6034.0,6862.30777508558,'Backup Camera',V6,"{None: 3000421, 'False': 6, ' Heated Wiper Park and Defroster|Deep Tinted Glass|Speed Sensitive Variable Intermittent Wipers|Galvanized Steel/Aluminum Panels|Lip Spoiler|Metal-Look Grille w/Chrome Surround|Liftgate Rear Cargo Access|Roof Rack Rails Only|LED Brakelights|Front Fog Lamps|Perimeter/Approach Lights|Radio w/Seek-Scan': 4, ' Clock': 4, ' Power Recline': 2, 'V6': 2, ' Height Adjustment and Fore/Aft Movement|4-Way Passenger Seat -inc: Manual Recline and Fore/Aft Movement|60-40 Folding Split-Bench Front Facing Fold Forward Seatback Rear Seat|Manual tilt steering column|Manual Rear Windows and Removable 3rd Row Windows|Illuminated Front Cupholder|Rear Cupholder|2 12V DC Power Outlets|Compass|Cruise Control w/Steering Wheel Controls|Manual Air Conditioning|Locking glove box|Interior Trim -inc: Metal-Look Instrument Panel Insert': 2, '42.2 in': 2, '42.6 in': 2, ' 155 amps (Included and only available with (LGX) 3.6L V6 engine without (V92) Trailering Package.)|Traction Select|Suspension': 2}"


In [40]:
def get_value_cnt(col, n=10):

    cnt = dict(parDF.select(col).groupBy(col).count().orderBy('count', ascending=False).take(n))
    return cnt


descr_trans['top_n_values'] = [get_value_cnt(col) for col in parDF.columns]

In [45]:
pd.set_option('display.max_colwidth', None)
descr_trans[['count', 'top_n_values']].sort_values(by='count', ascending=False)

summary,count,top_n_values
vin,3000507,"{' Sales Department open until 7 PM on Mondays. Doing business with Fairmont Ford will put a smile on your face. Established in 1909': 15, '5FNYF6H94MB000906': 2, '2HKRW2H52LH679102': 2, '2FMPK4K97LBB07668': 2, '1N6AD0EV6KN713209': 2, '1FADP3F29FL349680': 2, '5N1AZ2CSXLN157394': 2, '5XYPG4A31KG501728': 2, '1N4BL4DV0LC262865': 2, '5FNYF6H10MB018426': 2}"
city,3000133,"{'Houston': 43777, 'San Antonio': 24947, 'Columbus': 18333, 'Miami': 18198, 'Jacksonville': 16837, 'Las Vegas': 16396, 'Tampa': 15353, 'Phoenix': 14791, 'Dallas': 14641, 'Orlando': 14499}"
daysonmarket,3000126,"{'8': 73158, '7': 72604, '6': 69285, '13': 58356, '14': 58340, '5': 57396, '15': 56730, '12': 53006, '0': 52296, '20': 50989}"
dealer_zip,3000126,"{'77477': 7320, '33619': 5619, '77034': 5257, '77074': 5217, '91401': 5113, '77090': 4942, '85382': 4801, '85297': 4739, '85260': 4717, '30096': 4621}"
exterior_color,2996558,"{'Black': 105464, 'White': 95246, 'Silver': 59396, 'Gray': 58950, 'None': 40261, 'Summit White': 40244, 'Blue': 39021, 'Red': 31466, 'I4': 31330, '4-Wheel Disc Brakes': 30845}"
body_type,2986589,"{'SUV / Crossover': 1416402, 'Sedan': 742036, 'Pickup Truck': 474595, 'Hatchback': 88374, 'Minivan': 79802, 'Coupe': 71607, 'Van': 47166, 'Wagon': 40505, 'Convertible': 26010, None: 13918}"
franchise_dealer,2985724,"{'True': 1327898, 'False': 511280, 'Electronic Stability Control': 26082, 'Air Conditioning': 23666, 'I4': 21412, '4-Wheel Disc Brakes': 19916, None: 14783, 'Front Bucket Seats': 13819, 'V6': 13184, '6 Speakers': 12722}"
model_name,2975734,"{'F-150': 64858, 'Escape': 38920, 'Camry': 33630, 'Rogue': 31678, 'Equinox': 30903, 'Silverado 1500': 30495, 'CR-V': 30245, 'Accord': 29354, 'Civic': 27664, 'Explorer': 27629}"
interior_color,2973804,"{'Black': 498405, 'None': 315687, 'Gray': 153803, 'Black (Ebony)': 75200, 'Jet Black': 69520, 'Black (Charcoal)': 65757, 'Brown (Beige)': 31947, None: 26703, 'Brown (Tan)': 25656, 'False': 24594}"
make_name,2972421,"{'Ford': 286435, 'Chevrolet': 188171, 'Toyota': 169544, 'Honda': 158464, 'Nissan': 135681, 'Hyundai': 103593, 'Jeep': 77151, 'Kia': 75628, 'GMC': 50137, 'Volkswagen': 46596}"


In [39]:
# parDF.select('bed').groupBy('bed').count().orderBy('count', ascending=False).select('bed').take(20)

###### Columns below won't be investigated:

vin,
longitude,
savings_amount,
listing_id,
description,
latitude,
trimId,
main_picture_url,
frame_damaged,
theft_title,
salvage,
is_certified,
vehicle_damage_category,
bed_height,
bed_length,
cabin,
bed,
combine_fuel_economy,
