### Please note that the solution is running with Python3.

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn
import re

#### Import the data and get a high-level picture

In [2]:
df = pd.read_csv('sales.csv')
df.head()

Unnamed: 0,order_id,name,ordered_at,price,quantity,line_total
0,10000,"""ICE CREAM"" Peanut Fudge",2018-01-01 11:30:00,$3.50,3,$10.50
1,10000,"""ICE CREAM"" Peanut Fudge",2018-01-01 11:30:00,$3.50,1,$3.50
2,10001,"""SORBET"" Raspberry",2018-01-01 12:14:54,$2.50,2,$5.00
3,10001,,2018-01-01 12:14:54,$1.50,1,$1.50
4,10001,"""CONE"" Dipped Waffle Cone",2018-01-01 12:14:54,$3.50,1,$3.50


In [3]:
df.shape

(29922, 6)

In [None]:
df.dtypes

#### TODO: Fix column datatypes

Change ordered_at to datetime

Change price and line_total to float

In [None]:
df['ordered_at'] = pd.to_datetime(df['ordered_at'])

In [None]:
for column in ['price', 'line_total']:
    df[column] = df[column].apply(lambda x: float(x[1:]))

In [None]:
df.dtypes

#### TODO: drop if duplicated or null

In [None]:
df[df.duplicated()].shape[0]

In [None]:
df = df.drop_duplicates()

In [None]:
df.isnull().sum()

In [None]:
df[df['name'].isnull()].head()

In [None]:
df = df.dropna()

#### Sanity check for value ranges and to check assumptions

In [None]:
df[(df['price'] * df['quantity']) != df['line_total']].shape[0]

In [None]:
df[df['line_total'] < 0].shape[0]

#### TODO: 
Set line_total = price * quantity if different.

Then remove if line total < 0

In [None]:
df = df[(df['price'] * df['quantity']) == df['line_total']]

In [None]:
df = df[df['line_total'] >= 0]

In [None]:
df.describe()

#### TODO: Get value between "" in name and put it in category column

In [None]:
pattern = r'^"([A-Z ]+)" (.*)'
transform_func = lambda x: re.findall(pattern, x)[0]

df[['category', 'name']] = df['name'] \
    .apply(transform_func) \
    .apply(pd.Series)

In [None]:
df.head()

#### Analysis, finally!

In [None]:
f, ax = plt.subplots(figsize=(10, 6))
df.groupby('name')['line_total'].sum().sort_values(ascending=False).head(10).plot(kind='bar')
f.autofmt_xdate()
plt.show()