# Orders

## Import

In [2]:
import pandas as pd

# URLs for raw content of the CSV files on GitHub
orders_url = "https://raw.githubusercontent.com/MerleSt/Eniac/main/Data-Eniac/orders.csv"
orders = pd.read_csv(orders_url)
orders

Unnamed: 0,order_id,created_date,total_paid,state
0,241319,2017-01-02 13:35:40,44.99,Cancelled
1,241423,2017-11-06 13:10:02,136.15,Completed
2,242832,2017-12-31 17:40:03,15.76,Completed
3,243330,2017-02-16 10:59:38,84.98,Completed
4,243784,2017-11-24 13:35:19,157.86,Cancelled
...,...,...,...,...
226904,527397,2018-03-14 13:56:38,42.99,Place Order
226905,527398,2018-03-14 13:57:25,42.99,Shopping Basket
226906,527399,2018-03-14 13:57:34,141.58,Shopping Basket
226907,527400,2018-03-14 13:57:41,19.98,Shopping Basket


## Drop Duplicates

In [3]:
orders.duplicated().sum()

0

## Missing Values

In [6]:
orders.isna().sum()

order_id        0
created_date    0
total_paid      5
state           0
dtype: int64

In [7]:
print(f"5 missing values represents {((orders.total_paid.isna().sum() / orders.shape[0])*100).round(5)}% of the rows in our DataFrame")

5 missing values represents 0.0022% of the rows in our DataFrame


In [8]:
orders.total_paid.isna().value_counts(normalize=True)

False    0.999978
True     0.000022
Name: total_paid, dtype: float64

In [11]:
orders = orders.loc[~orders.total_paid.isna(), :]
orders.isna().sum()

order_id        0
created_date    0
total_paid      0
state           0
dtype: int64

## Datatypes & Format

In [15]:
orders.dtypes

order_id          int64
created_date     object
total_paid      float64
state            object
dtype: object

In [20]:
orders['order_id'] = orders['order_id'].astype(str)

In [19]:
orders['created_date']  = pd.to_datetime(orders['created_date'])

In [21]:
orders.dtypes

order_id                object
created_date    datetime64[ns]
total_paid             float64
state                   object
dtype: object

## Export

In [23]:
orders.to_csv('/Users/merlesteffen/Documents/GitHub/Eniac/Data-Eniac/Data_Cleaned/orders.csv', index=False)