In [None]:
import os
import pandas as pd

### Read a single month sales data

In [None]:
df=pd.read_csv('./Sales_Data/Sales_April_2019.csv')
df.head()

### List all Sales data csv files

In [None]:
files = [file for file in os.listdir('./Sales_Data')]
for file in files:
    print(file)

### Concatenate all csv files into one Sales data file

In [None]:
files = [file for file in os.listdir('./Sales_Data')]
all_data_csv = pd.DataFrame()
for file in files:
   df=pd.read_csv('./Sales_Data/'+file)
   all_data_csv=pd.concat([all_data_csv,df])

all_data_csv = all_data_csv.to_csv('all_data_csv.csv',index=False)

### Read all data csv file to a data frame

In [None]:
all_sales_data = pd.read_csv('./Sales_Data/all_data_csv.csv')
all_sales_data.head()

## Clean up the data!

##### Get all NaN rows in the dataframe

In [None]:
nan_df = all_sales_data[all_sales_data.isna().any(axis=1)]
nan_df.head()

##### Drop NaN rows from the dataframe

In [None]:
all_sales_data=all_sales_data.dropna(how='all')
all_sales_data.head()

#### Find 'Or' and exclude it from the dataframe

In [None]:
all_sales_data = all_sales_data[all_sales_data['Order Date'].str[0:2] != 'Or']
all_sales_data.head()

#### Convert columns to correct data types

In [None]:
all_sales_data['Quantity Ordered']=pd.to_numeric(all_sales_data['Quantity Ordered'])
all_sales_data['Price Each']=pd.to_numeric(all_sales_data['Price Each'])
all_sales_data.head()

## Add columns to dataframe

### Add month column to dataframe

In [None]:
all_sales_data['Month']=all_sales_data['Order Date'].str[0:2]
all_sales_data['Month']=all_sales_data['Month'].astype('int32')
all_sales_data.head()

### Add sales column to dataframe

In [None]:
all_sales_data['Sales'] = all_sales_data['Quantity Ordered']*all_sales_data['Price Each']
all_sales_data.head()

## Data Analysis

### 1. What was the best month for sales?. How much was earned that month?

In [None]:
results=all_sales_data.groupby('Month').sum(numeric_only=True)
results

### 2. plot the above data

In [None]:
import matplotlib.pyplot as plt

months = range(1,13)
plt.bar(months,results['Sales'])
plt.xticks(months)
plt.ylabel('Sales in USD($)')
plt.xlabel('Months')
plt.suptitle('Sales Per Month')
plt.show()