# Pull a subset from the full dataset (*found in `raw_data_combined.csv`*)<br>for easier testing when parsing `desc` (*description*) values

## <font color=MediumSlateBlue>1) Import Libraries</font>

In [1]:
# Import libraries
import pandas as pd
from pathlib import Path

# ~~~~~~~~~~ VERY IMPORTANT NOTE ~~~~~~~~~~
actually_save_csv = True

## <font color=MediumSlateBlue>2) Import Full Dataset for Slicing</font>

In [2]:
full_df = pd.read_csv(Path('./Resources/raw_data_combined_v2.csv'), index_col='uid')

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
  mask |= (ar1 == a)


## <font color=LightSalmon>3) Review Full Dataset</font>

### <font color=PaleGreen>3a) Check Column Names</font>

In [3]:
full_df.columns.values

array(['city', 'price', 'year', 'manufacturer', 'make', 'condition',
       'cylinders', 'fuel', 'odometer', 'title_status', 'transmission',
       'VIN', 'drive', 'size', 'type', 'paint_color', 'lat', 'long',
       'posting_date', 'desc', 'region', 'model'], dtype=object)

### <font color=PaleGreen>3b) Check Column Types</font>

In [4]:
full_df.dtypes

city             object
price             int64
year            float64
manufacturer     object
make             object
condition        object
cylinders        object
fuel             object
odometer        float64
title_status     object
transmission     object
VIN              object
drive            object
size             object
type             object
paint_color      object
lat             float64
long            float64
posting_date     object
desc             object
region           object
model            object
dtype: object

## <font color=MediumSlateBlue>4) Slice Subset of Full Dataset</font>

### <font color=PaleGreen>4a) Slice the Full Dataset</font>

In [5]:
subset_df = full_df.iloc[-500:]

### <font color=PaleGreen>4b) Review the New DataFrame</font>

In [6]:
# subset_df
subset_df.dtypes

city             object
price             int64
year            float64
manufacturer     object
make             object
condition        object
cylinders        object
fuel             object
odometer        float64
title_status     object
transmission     object
VIN              object
drive            object
size             object
type             object
paint_color      object
lat             float64
long            float64
posting_date     object
desc             object
region           object
model            object
dtype: object

### ~~<font color=PaleGreen>4c) Drop Extra `uid` Column</font>~~

~~For reasons not understood as of this writing, there is an additional `uid` column present when `raw_data_combined.csv` is first imported: `uid` and `uid.1`~~

~~The desired `uid` column for our indexing purposes is in fact `uid.1` so:~~
~~1. Drop the incorrect `uid` column (`uid`) -- which is *not* the index~~
~~2. Rename the existing index from `uid.1` to  `uid`~~

In [7]:
# # drop the `uid` column which is not the index
# subset_df.drop(columns='uid', inplace=True)
# # rename the existing index from `uid.1` to simply `uid`
# subset_df.index.rename('uid', inplace=True)

### <font color=PaleGreen>4d) Review the DataFrame</font>

In [8]:
subset_df

Unnamed: 0_level_0,city,price,year,manufacturer,make,condition,cylinders,fuel,odometer,title_status,...,drive,size,type,paint_color,lat,long,posting_date,desc,region,model
uid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
7313710266,,7550,2015.0,dodge,,,4 cylinders,gas,73269.0,clean,...,fwd,,sedan,grey,40.745755,-111.939740,2021-04-27T23:45:57-0600,Engine: 4 Cylinders Transmission: Automatic Ti...,wyoming,dart sxt
7313702408,,13950,2017.0,nissan,,,4 cylinders,gas,28709.0,rebuilt,...,4wd,,SUV,black,40.745787,-111.939869,2021-04-27T22:45:22-0600,Engine: 4 Cylinders Transmission: Automatic Ti...,wyoming,rogue awd
7313679442,,11500,2012.0,chevrolet,,good,8 cylinders,gas,175102.0,rebuilt,...,4wd,full-size,truck,black,41.888456,-107.495083,2021-04-27T20:54:00-0600,Well taken care of truck for sale.,wyoming,silverado
7313673342,,8000,2002.0,ford,,like new,8 cylinders,gas,139500.0,clean,...,4wd,full-size,truck,white,41.439000,-105.801000,2021-04-27T20:31:22-0600,Very clean and straight plow truck. All record...,wyoming,f-150
7313667324,,4950,2012.0,dodge,,,6 cylinders,gas,142761.0,clean,...,fwd,,van,black,40.745820,-111.939998,2021-04-27T20:10:23-0600,Engine: 6 Cylinders Transmission: Automatic Ti...,wyoming,grand caravan
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7301591192,,23590,2019.0,nissan,,good,6 cylinders,gas,32226.0,clean,...,fwd,,sedan,,33.786500,-84.445400,2021-04-04T03:21:31-0600,Carvana is the safer way to buy a car During t...,wyoming,maxima s sedan 4d
7301591187,,30590,2020.0,volvo,,good,,gas,12029.0,clean,...,fwd,,sedan,red,33.786500,-84.445400,2021-04-04T03:21:29-0600,Carvana is the safer way to buy a car During t...,wyoming,s60 t5 momentum sedan 4d
7301591147,,34990,2020.0,cadillac,,good,,diesel,4174.0,clean,...,,,hatchback,white,33.779214,-84.411811,2021-04-04T03:21:17-0600,Carvana is the safer way to buy a car During t...,wyoming,xt4 sport suv 4d
7301591140,,28990,2018.0,lexus,,good,6 cylinders,gas,30112.0,clean,...,fwd,,sedan,silver,33.786500,-84.445400,2021-04-04T03:21:11-0600,Carvana is the safer way to buy a car During t...,wyoming,es 350 sedan 4d


## <font color=MediumSlateBlue>5) Save Subset DataFrame to a CSV</font>

### <font color=PaleGreen>5a) Save the Subset DataFrame to a CSV</font>

In [9]:
if actually_save_csv:
    subset_df.to_csv(
        Path('./Resources/subset_for_testing.csv'),
        )

### <font color=PaleGreen>5b) Read Back the New CSV File for Confirmation</font>

In [10]:
test_subset_df = pd.read_csv(
    Path('./Resources/subset_for_testing.csv'),
    index_col='uid'
)

### <font color=PaleGreen>5c) Review New DataFrame for Confirmation</font>

In [11]:
test_subset_df

Unnamed: 0_level_0,city,price,year,manufacturer,make,condition,cylinders,fuel,odometer,title_status,...,drive,size,type,paint_color,lat,long,posting_date,desc,region,model
uid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
7313710266,,7550,2015.0,dodge,,,4 cylinders,gas,73269.0,clean,...,fwd,,sedan,grey,40.745755,-111.939740,2021-04-27T23:45:57-0600,Engine: 4 Cylinders Transmission: Automatic Ti...,wyoming,dart sxt
7313702408,,13950,2017.0,nissan,,,4 cylinders,gas,28709.0,rebuilt,...,4wd,,SUV,black,40.745787,-111.939869,2021-04-27T22:45:22-0600,Engine: 4 Cylinders Transmission: Automatic Ti...,wyoming,rogue awd
7313679442,,11500,2012.0,chevrolet,,good,8 cylinders,gas,175102.0,rebuilt,...,4wd,full-size,truck,black,41.888456,-107.495083,2021-04-27T20:54:00-0600,Well taken care of truck for sale.,wyoming,silverado
7313673342,,8000,2002.0,ford,,like new,8 cylinders,gas,139500.0,clean,...,4wd,full-size,truck,white,41.439000,-105.801000,2021-04-27T20:31:22-0600,Very clean and straight plow truck. All record...,wyoming,f-150
7313667324,,4950,2012.0,dodge,,,6 cylinders,gas,142761.0,clean,...,fwd,,van,black,40.745820,-111.939998,2021-04-27T20:10:23-0600,Engine: 6 Cylinders Transmission: Automatic Ti...,wyoming,grand caravan
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7301591192,,23590,2019.0,nissan,,good,6 cylinders,gas,32226.0,clean,...,fwd,,sedan,,33.786500,-84.445400,2021-04-04T03:21:31-0600,Carvana is the safer way to buy a car During t...,wyoming,maxima s sedan 4d
7301591187,,30590,2020.0,volvo,,good,,gas,12029.0,clean,...,fwd,,sedan,red,33.786500,-84.445400,2021-04-04T03:21:29-0600,Carvana is the safer way to buy a car During t...,wyoming,s60 t5 momentum sedan 4d
7301591147,,34990,2020.0,cadillac,,good,,diesel,4174.0,clean,...,,,hatchback,white,33.779214,-84.411811,2021-04-04T03:21:17-0600,Carvana is the safer way to buy a car During t...,wyoming,xt4 sport suv 4d
7301591140,,28990,2018.0,lexus,,good,6 cylinders,gas,30112.0,clean,...,fwd,,sedan,silver,33.786500,-84.445400,2021-04-04T03:21:11-0600,Carvana is the safer way to buy a car During t...,wyoming,es 350 sedan 4d
