In [3]:
import os
import pandas as pd

### Read a single month sales data

In [None]:
df=pd.read_csv('./Sales_Data/Sales_April_2019.csv')
df.head()

### List all Sales data csv files

In [None]:
files = [file for file in os.listdir('./Sales_Data')]
for file in files:
    print(file)

### Concatenate all csv files into one Sales data file

In [None]:
files = [file for file in os.listdir('./Sales_Data')]
all_data_csv = pd.DataFrame()
for file in files:
   df=pd.read_csv('./Sales_Data/'+file)
   all_data_csv=pd.concat([all_data_csv,df])

all_data_csv = all_data_csv.to_csv('all_data_csv.csv',index=False)

### Read all data csv file to a data frame

In [4]:
all_sales_data = pd.read_csv('./Sales_Data/all_data_csv.csv')
all_sales_data.head()

Unnamed: 0,Order ID,Product,Quantity Ordered,Price Each,Order Date,Purchase Address
0,259358,34in Ultrawide Monitor,1,379.99,10/28/19 10:56,"609 Cherry St, Dallas, TX 75001"
1,259359,27in 4K Gaming Monitor,1,389.99,10/28/19 17:26,"225 5th St, Los Angeles, CA 90001"
2,259360,AAA Batteries (4-pack),2,2.99,10/24/19 17:20,"967 12th St, New York City, NY 10001"
3,259361,27in FHD Monitor,1,149.99,10/14/19 22:26,"628 Jefferson St, New York City, NY 10001"
4,259362,Wired Headphones,1,11.99,10/07/19 16:10,"534 14th St, Los Angeles, CA 90001"


## Clean up the data!

##### Get all NaN rows in the dataframe

In [5]:
nan_df = all_sales_data[all_sales_data.isna().any(axis=1)]
nan_df.head()

Unnamed: 0,Order ID,Product,Quantity Ordered,Price Each,Order Date,Purchase Address
282,,,,,,
509,,,,,,
838,,,,,,
888,,,,,,
1169,,,,,,


##### Drop NaN rows from the dataframe

In [6]:
all_sales_data=all_sales_data.dropna(how='all')
all_sales_data.head()

Unnamed: 0,Order ID,Product,Quantity Ordered,Price Each,Order Date,Purchase Address
0,259358,34in Ultrawide Monitor,1,379.99,10/28/19 10:56,"609 Cherry St, Dallas, TX 75001"
1,259359,27in 4K Gaming Monitor,1,389.99,10/28/19 17:26,"225 5th St, Los Angeles, CA 90001"
2,259360,AAA Batteries (4-pack),2,2.99,10/24/19 17:20,"967 12th St, New York City, NY 10001"
3,259361,27in FHD Monitor,1,149.99,10/14/19 22:26,"628 Jefferson St, New York City, NY 10001"
4,259362,Wired Headphones,1,11.99,10/07/19 16:10,"534 14th St, Los Angeles, CA 90001"


#### Find 'Or' and exclude it from the dataframe

In [7]:
all_sales_data = all_sales_data[all_sales_data['Order Date'].str[0:2] != 'Or']
all_sales_data.head()

Unnamed: 0,Order ID,Product,Quantity Ordered,Price Each,Order Date,Purchase Address
0,259358,34in Ultrawide Monitor,1,379.99,10/28/19 10:56,"609 Cherry St, Dallas, TX 75001"
1,259359,27in 4K Gaming Monitor,1,389.99,10/28/19 17:26,"225 5th St, Los Angeles, CA 90001"
2,259360,AAA Batteries (4-pack),2,2.99,10/24/19 17:20,"967 12th St, New York City, NY 10001"
3,259361,27in FHD Monitor,1,149.99,10/14/19 22:26,"628 Jefferson St, New York City, NY 10001"
4,259362,Wired Headphones,1,11.99,10/07/19 16:10,"534 14th St, Los Angeles, CA 90001"


#### Convert columns to correct data types

In [8]:
all_sales_data['Quantity Ordered']=pd.to_numeric(all_sales_data['Quantity Ordered'])
all_sales_data['Price Each']=pd.to_numeric(all_sales_data['Price Each'])
all_sales_data.head()

Unnamed: 0,Order ID,Product,Quantity Ordered,Price Each,Order Date,Purchase Address
0,259358,34in Ultrawide Monitor,1,379.99,10/28/19 10:56,"609 Cherry St, Dallas, TX 75001"
1,259359,27in 4K Gaming Monitor,1,389.99,10/28/19 17:26,"225 5th St, Los Angeles, CA 90001"
2,259360,AAA Batteries (4-pack),2,2.99,10/24/19 17:20,"967 12th St, New York City, NY 10001"
3,259361,27in FHD Monitor,1,149.99,10/14/19 22:26,"628 Jefferson St, New York City, NY 10001"
4,259362,Wired Headphones,1,11.99,10/07/19 16:10,"534 14th St, Los Angeles, CA 90001"


## Add columns to dataframe

### Add month column to dataframe

In [None]:
all_sales_data['Month']=all_sales_data['Order Date'].str[0:2]
all_sales_data['Month']=all_sales_data['Month'].astype('int32')
all_sales_data.head()

### Add sales column to dataframe

In [9]:
all_sales_data['Sales'] = all_sales_data['Quantity Ordered']*all_sales_data['Price Each']
all_sales_data.head()

Unnamed: 0,Order ID,Product,Quantity Ordered,Price Each,Order Date,Purchase Address,Sales
0,259358,34in Ultrawide Monitor,1,379.99,10/28/19 10:56,"609 Cherry St, Dallas, TX 75001",379.99
1,259359,27in 4K Gaming Monitor,1,389.99,10/28/19 17:26,"225 5th St, Los Angeles, CA 90001",389.99
2,259360,AAA Batteries (4-pack),2,2.99,10/24/19 17:20,"967 12th St, New York City, NY 10001",5.98
3,259361,27in FHD Monitor,1,149.99,10/14/19 22:26,"628 Jefferson St, New York City, NY 10001",149.99
4,259362,Wired Headphones,1,11.99,10/07/19 16:10,"534 14th St, Los Angeles, CA 90001",11.99


## Data Analysis

### 1. What was the best month for sales?. How much was earned that month?