# Setup in VS Code
- First set your Python interpreter in (Cmd Shift P): "Python: Select Interpreter" and select a matching "python3" for the next line
- Install Jupyter if not already installed: "pip3 install Jupyter"
- Next start a new Jupyter notebook (Cmd Shift P): "Jupyter: Create New Blank Notebook"
- Install "pip3 install pandas" and to be able to read Excel files "pip3 install openpyxl"
- Install "pip3 install xlsxwriter" and to be able to append Excel files

In [52]:
# Pandas for CSV/Excel manipulation
import pandas as pd

# Read CSV file

In [53]:
# Create DataFrame from CSV
df_csv = pd.read_csv("testtable_source.csv")
# View your new DataFrame
df_csv

Unnamed: 0,TEST_DATETIME,TEST_NUMBER,TEST_TEXT,RECORD_ADDED
0,2021-08-25 08:42:43,428514,Record added to CSV August 25 of 2021,2021-08-25 08:42:43
1,2021-08-25 08:42:43,904954,Record added to CSV August 25 of 2021,2021-08-25 08:42:43
2,2021-08-25 08:42:43,629890,Record added to CSV August 25 of 2021,2021-08-25 08:42:43
3,2021-08-25 08:42:43,326473,Record added to CSV August 25 of 2021,2021-08-25 08:42:43
4,2021-08-26 08:42:43,815098,Record added to CSV August 26 of 2021,2021-08-26 08:42:43
5,2021-08-26 08:42:43,301769,Record added to CSV August 26 of 2021,2021-08-26 08:42:43
6,2021-08-26 08:42:43,879650,Record added to CSV August 26 of 2021,2021-08-26 08:42:43
7,2021-08-26 08:42:43,737747,Record added to CSV August 26 of 2021,2021-08-26 08:42:43
8,2021-08-27 08:42:43,140170,Record added to CSV August 27 of 2021,2021-08-27 08:42:43
9,2021-08-27 08:42:43,246890,Record added to CSV August 27 of 2021,2021-08-27 08:42:43


# Read Excel file

In [54]:
# Create DataFrame from Excel
df_excel = pd.read_excel("testtable_source.xlsx", converters={'TEST_DATETIME':str, 'TEST_NUMBER':int, 'TEST_TEXT':str})
# View your new DataFrame
df_excel

Unnamed: 0,TEST_DATETIME,TEST_NUMBER,TEST_TEXT,RECORD_ADDED
0,2021-08-25 08:42:43,428514,Record added to Excel August 25 of 2021,2021-08-25 08:42:43
1,2021-08-25 08:42:43,904954,Record added to Excel August 25 of 2021,2021-08-25 08:42:43
2,2021-08-25 08:42:43,629890,Record added to Excel August 25 of 2021,2021-08-25 08:42:43
3,2021-08-25 08:42:43,326473,Record added to Excel August 25 of 2021,2021-08-25 08:42:43
4,2021-08-26 08:42:43,815098,Record added to Excel August 26 of 2021,2021-08-26 08:42:43
5,2021-08-26 08:42:43,301769,Record added to Excel August 26 of 2021,2021-08-26 08:42:43
6,2021-08-26 08:42:43,879650,Record added to Excel August 26 of 2021,2021-08-26 08:42:43
7,2021-08-26 08:42:43,737747,Record added to Excel August 26 of 2021,2021-08-26 08:42:43
8,2021-08-27 08:42:43,140170,Record added to Excel August 27 of 2021,2021-08-27 08:42:43
9,2021-08-27 08:42:43,246890,Record added to Excel August 27 of 2021,2021-08-27 08:42:43


# CSV Columns

In [55]:
# List DataFrame columns
df_csv.columns
# List field types
df_csv.convert_dtypes().dtypes

TEST_DATETIME    string
TEST_NUMBER       Int64
TEST_TEXT        string
RECORD_ADDED     string
dtype: object

# Excel Columns

In [56]:
# List DataFrame columns
df_excel.columns
# List field types
df_excel.convert_dtypes().dtypes

TEST_DATETIME            string
TEST_NUMBER               Int64
TEST_TEXT                string
RECORD_ADDED     datetime64[ns]
dtype: object

# Write CSV to Excel

In [57]:
# Send CSV DataFrame to Excel file
df_csv.to_excel('testtable_csv_export.xlsx', index=False)

# Write Excel to CSV

In [58]:
# Send Excel DataFrame to CSV file
df_excel.to_csv('testtable_excel_export.csv', index=False)

# Excel/CSV Oldest and Latest Dates

In [59]:
# String to Date/Time
df_excel['TEST_DATETIME'] = pd.to_datetime(df_excel['TEST_DATETIME'])
df_csv['TEST_DATETIME'] = pd.to_datetime(df_csv['TEST_DATETIME'])
# Get oldest CSV date
date_oldest_csv = df_csv['TEST_DATETIME'].min()
print ("Oldest CSV date is: " + str(date_oldest_csv))
# Get newest Excel date
date_newest_excel = df_excel['TEST_DATETIME'].max()
print ("Newest Excel date is: " + str(date_newest_excel))

Oldest CSV date is: 2021-08-25 08:42:43
Newest Excel date is: 2021-08-27 08:42:43


# Write CSV transform to Excel

In [60]:
# Filter the CSV data
date_query = df_csv['TEST_DATETIME'] == date_oldest_csv
df_filtered_csv = df_csv.query('@date_query')
# Send part part of the CSV filtered DataFrame to Excel file
df_filtered_csv.to_excel('testtable_transformed_csv_export.xlsx', index=False, columns=['TEST_DATETIME', 'TEST_NUMBER'])

# Write Excel transform to CSV

In [61]:
# Filter the Excel data
date_query = df_excel['TEST_DATETIME'] == date_newest_excel
df_filtered_excel = df_excel.query('@date_query')
# Send part part of the Excel filtered DataFrame to CSV file
df_filtered_excel.to_csv('testtable_transformed_excel_export.csv', index=False, columns=['TEST_DATETIME', 'TEST_NUMBER'])

# Excel and CSV Combine into Single Excel/CSV

In [62]:
# Append two DataFrames
df_excel_combined = df_excel.append(df_csv, ignore_index=True)
# View combined DataFrame
df_excel_combined
# Send combined DataFrame to Excel and CSV file
df_excel_combined.to_excel('testtable_combined_export.xlsx', index=False)
df_excel_combined.to_csv('testtable_combined_export.csv', index=False)

# Write Two Seperate Data Sets to Excel in Seperate Sheets

In [51]:
# Start Excel Writer to allow us to append
Excelwriter = pd.ExcelWriter("testtable_sheets_export.xlsx", engine="xlsxwriter")
# Using the same filter data was used for the transforms, this time with all columns
df_filtered_csv.to_excel(Excelwriter, sheet_name="CSVData", index=False)
df_filtered_excel.to_excel(Excelwriter, sheet_name="ExcelData", index=False)
# Save Excel file
Excelwriter.save()