# Spring Cleaning!

Harold's stock data is a mess! Help him clean up his data before the auditors arrive!

In [None]:
# Initial imports
import pandas as pd
from pathlib import Path

### Load CSV data into Pandas using `read_csv`

In [None]:
csv_path = Path("../stock_data.csv")
csv_data = pd.read_csv(csv_path)

### Identify the number of rows and columns (shape) in the DataFrame.

In [None]:
csv_data.shape

### Preview the DataFrame using `head` to visually ensure data has been loaded in correctly.

In [None]:
csv_data.head()

### Identify the number of records in the DataFrame, and compare it with the number of rows in the original file.

In [None]:
csv_data.count()

### Identify null records

In [None]:
csv_data.isnull().mean() * 100

### Drop Null Records

In [None]:
csv_data = csv_data.dropna().copy()

### Validate nulls have been dropped

In [None]:
csv_data.isnull().sum()

### Default null `ebitda` values to 0. Then, validate no records are null for `ebitda`.

In [None]:
csv_data["ebitda"] = csv_data["ebitda"].fillna(0)
csv_data["ebitda"].isnull().sum()

### Drop Duplicates

In [None]:
csv_data = csv_data.drop_duplicates().copy()

---

### Challenge

#### Preview `price` field using the `head` function.

In [None]:
csv_data["price"].head(10)

#### Clean `price` Series by replacing `$`

In [None]:
csv_data["price"] = csv_data["price"].str.replace("$", "")
csv_data["price"].head(10)

#### Confirm data type of `price`

In [None]:
csv_data["price"].dtype

#### Cast `price` Series as float and then validate using `dtype`

In [None]:
csv_data["price"] = csv_data["price"].astype('float')
csv_data["price"].dtype