# Spring Cleaning!

Harold's stock data is a mess! Help him clean up his data before the auditors arrive!

In [1]:
# Import Libraries
import pandas as pd
from pathlib import Path

### Load CSV data into Pandas using `read_csv`

In [2]:
csv_path = Path("../../Resources/stock_data.csv")
csv_data = pd.read_csv(csv_path)

### Identify the number of rows and columns (shape) in the DataFrame.

In [16]:
csv_data.shape

(478, 14)

### Generate a sample of the data to visually ensure data has been loaded in correctly.

In [17]:
csv_data.sample(10)

Unnamed: 0,symbol,name,sector,price,price_per_earnings,dividend_yield,earnings_per_share,52_week_low,52_week_high,market_cap,ebitda,price_per_sales,price_per_book,sec_filings
247,ICE,Intercontinental Exchange,Financials,67.0,22.95,5.429864,2.37,76.1378,56.8,41373050000.0,3103000000.0,9.619987,2.62,http://www.sec.gov/cgi-bin/browse-edgar?action...
40,AMGN,Amgen Inc,Health Care,173.12,13.76,2.975151,2.57,201.23,152.16,128133300000.0,11945000000.0,5.58192,3.91,http://www.sec.gov/cgi-bin/browse-edgar?action...
154,DPS,Dr Pepper Snapple Group,Consumer Staples,116.93,26.57,1.966102,4.54,126.65,83.23,21209780000.0,1507000000.0,4.214919,9.99,http://www.sec.gov/cgi-bin/browse-edgar?action...
210,GIS,General Mills,Consumer Staples,53.99,17.53,3.586459,2.77,63.73,49.65,31098240000.0,3107600000.0,2.038918,7.26,http://www.sec.gov/cgi-bin/browse-edgar?action...
276,LH,Laboratory Corp. of America Holding,Health Care,165.46,17.79,0.0,7.02,181.715,130.292,17271390000.0,1861200000.0,2.245962,2.71,http://www.sec.gov/cgi-bin/browse-edgar?action...
200,FTV,Fortive Corp,Industrials,69.14,25.05,0.390571,2.46,76.68,54.8844,24916500000.0,1508300000.0,5.168267,7.17,http://www.sec.gov/cgi-bin/browse-edgar?action...
219,HBI,Hanesbrands Inc,Consumer Discretionary,19.57,10.04,2.73224,1.41,25.73,18.9,8006269000.0,926153000.0,1.633977,6.17,http://www.sec.gov/cgi-bin/browse-edgar?action...
269,KMI,Kinder Morgan,Energy,16.8,25.07,2.891845,0.01,22.75,16.56,38612710000.0,5981000000.0,2.823936,1.07,http://www.sec.gov/cgi-bin/browse-edgar?action...
305,MTD,Mettler Toledo,Health Care,601.0,35.56,0.0,14.24,697.26,459.34,16420770000.0,666706000.0,8.372307,31.69,http://www.sec.gov/cgi-bin/browse-edgar?action...
10,AET,Aetna Inc,Health Care,178.0,18.11,1.101989,5.75,194.4,119.51,59197020000.0,4139000000.0,0.992355,3.79,http://www.sec.gov/cgi-bin/browse-edgar?action...


### Identify the number of records in the DataFrame, and compare it with the number of rows in the original file.

In [6]:
csv_data.head()

Unnamed: 0,symbol,name,sector,price,price_per_earnings,dividend_yield,earnings_per_share,52_week_low,52_week_high,market_cap,ebitda,price_per_sales,price_per_book,sec_filings
0,MMM,3M Company,Industrials,$222.89,24.31,2.332862,$7.92,259.77,175.49,138721100000.0,9048000000.0,4.390271,11.34,http://www.sec.gov/cgi-bin/browse-edgar?action...
1,AOS,A.O. Smith Corp,Industrials,,,,,,,,,,,
2,ABT,Abbott Laboratories,Health Care,56.27,22.51,1.908982,0.26,64.6,42.28,102121000000.0,5744000000.0,3.74048,3.19,http://www.sec.gov/cgi-bin/browse-edgar?action...
3,ABBV,AbbVie Inc.,Health Care,108.48,19.41,2.49956,3.29,125.86,60.05,181386300000.0,10310000000.0,6.291571,26.14,http://www.sec.gov/cgi-bin/browse-edgar?action...
4,ATVI,Activision Blizzard,Information Technology,65.83,,0.431903,1.28,74.945,38.93,52518670000.0,2704000000.0,10.59512,5.16,http://www.sec.gov/cgi-bin/browse-edgar?action...


### Identify nulls records

In [19]:
csv_data.isnull().mean() * 100

symbol                0.0
name                  0.0
sector                0.0
price                 0.0
price_per_earnings    0.0
dividend_yield        0.0
earnings_per_share    0.0
52_week_low           0.0
52_week_high          0.0
market_cap            0.0
ebitda                0.0
price_per_sales       0.0
price_per_book        0.0
sec_filings           0.0
dtype: float64

### Drop Null Records

In [13]:
csv_data.dropna(inplace=True)
csv_data

Unnamed: 0,symbol,name,sector,price,price_per_earnings,dividend_yield,earnings_per_share,52_week_low,52_week_high,market_cap,ebitda,price_per_sales,price_per_book,sec_filings
0,MMM,3M Company,Industrials,$222.89,24.31,2.332862,$7.92,259.77,175.490,1.387211e+11,9.048000e+09,4.390271,11.34,http://www.sec.gov/cgi-bin/browse-edgar?action...
2,ABT,Abbott Laboratories,Health Care,56.27,22.51,1.908982,0.26,64.60,42.280,1.021210e+11,5.744000e+09,3.740480,3.19,http://www.sec.gov/cgi-bin/browse-edgar?action...
3,ABBV,AbbVie Inc.,Health Care,108.48,19.41,2.499560,3.29,125.86,60.050,1.813863e+11,1.031000e+10,6.291571,26.14,http://www.sec.gov/cgi-bin/browse-edgar?action...
5,AYI,Acuity Brands Inc,Industrials,108.48,18.22,0.351185,7.43,225.36,142.000,6.242378e+09,5.878000e+08,1.795347,3.55,http://www.sec.gov/cgi-bin/browse-edgar?action...
6,ADBE,Adobe Systems Inc,Information Technology,185.16,52.31,0.000000,3.39,204.45,114.451,9.455021e+10,2.538040e+09,13.092818,11.06,http://www.sec.gov/cgi-bin/browse-edgar?action...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
499,XYL,Xylem Inc.,Industrials,70.24,30.94,1.170079,1.83,76.81,46.860,1.291502e+10,7.220000e+08,2.726209,5.31,http://www.sec.gov/cgi-bin/browse-edgar?action...
500,YUM,Yum! Brands Inc,Consumer Discretionary,76.3,27.25,1.797080,4.07,86.93,62.850,2.700330e+10,2.289000e+09,6.313636,212.08,http://www.sec.gov/cgi-bin/browse-edgar?action...
501,ZBH,Zimmer Biomet Holdings,Health Care,115.53,14.32,0.794834,9.01,133.49,108.170,2.445470e+10,2.007400e+09,3.164895,2.39,http://www.sec.gov/cgi-bin/browse-edgar?action...
502,ZION,Zions Bancorp,Financials,50.71,17.73,1.480933,2.6,55.61,38.430,1.067068e+10,0.000000e+00,3.794579,1.42,http://www.sec.gov/cgi-bin/browse-edgar?action...


### Validate nulls have been dropped

In [14]:
csv_data.isnull().sum()

symbol                0
name                  0
sector                0
price                 0
price_per_earnings    0
dividend_yield        0
earnings_per_share    0
52_week_low           0
52_week_high          0
market_cap            0
ebitda                0
price_per_sales       0
price_per_book        0
sec_filings           0
dtype: int64

### Default null `ebitda` values to 0. Then, validate no records are null for ebitda.

NameError: name 'ebitda' is not defined

### Drop Duplicates

### Sample `price` field

### Clean `price` Series by replacing `$`

### Confirm data type of `price`

### Cast `price` Series as float and then validate using `dtype`