In [1]:
#
# example of binary data not directly supported in Pandas
# but easily read by another module
#
import scipy.io
mat = scipy.io.loadmat('datasets/matlab.mat')
mat

{'__header__': b'MATLAB 5.0 MAT-file Platform: nt, Created on: Tue Feb  2 14:21:02 2021',
 '__version__': '1.0',
 '__globals__': [],
 'storage': array([[0.00000000e+00],
        [3.60020368e-04],
        [7.26299303e-04],
        ...,
        [1.36616373e-05],
        [1.35810556e-05],
        [1.36134929e-05]]),
 'T1': array([[475.5],
        [475.5],
        [475.4],
        ...,
        [476.8],
        [476.8],
        [476.8]]),
 'time': array([[10256548.8],
        [10256549. ],
        [10256549.2],
        ...,
        [10273672.4],
        [10273672.6],
        [10273672.8]]),
 'value': array([[10256548.8       ],
        [10256550.09106825],
        [10256550.31226313],
        ...,
        [10273670.63541315],
        [10273672.1572869 ],
        [10273672.87393071]])}

In [2]:
#
# excel data
#
import pandas as pd
#
# simple file 
#
dog_food_orders = \
    pd.read_excel('datasets/dog_food_orders.xlsx', engine = 'openpyxl')
dog_food_orders

Unnamed: 0,product,wholesale_price,msrp,qty_ordered,qty_shipped
0,skippys_dream,8.99,18.38,100,100
1,just_the_beef,4.99,10.43,200,195
2,potatos_and_lamb,5.19,11.43,50,50
3,turkey_and_cranberries,5.98,12.0,50,50
4,roasted_duck,9.59,17.48,15,15


In [4]:
#
# read html from website
# Page name: Wind power
# Author: Wikipedia contributors
# Publisher: Wikipedia, The Free Encyclopedia.
# Date of last revision: 9 May 2021 22:11 UTC
# Date retrieved: 12 May 2021 23:32 UTC
# Permanent link: https://en.wikipedia.org/w/index.php?title=Wind_power&oldid=1022329353
# Primary contributors: Revision history statistics
# Page Version ID: 1022329353
# Date retrieved: 7 February 2021 20:17 UTC
#
import pandas as pd
#
data_url = 'https://en.wikipedia.org/wiki/Wind_power'
data = pd.read_html(data_url)
data

[                                                   0
 0                                Part of a series on
 1                                 Sustainable energy
 2  Energy conservation Cogeneration Efficient ene...
 3  Renewable energy Hydroelectricity Solar Wind G...
 4  Sustainable transport Electric vehicle Green v...
 5                  Renewable energy portal  Category
 6  .mw-parser-output .navbar{display:inline;font-...,
                           Wind farm  Capacity(MW)         Country      Refs
 0                   Gansu Wind Farm          7965           China  [23][24]
 1               Muppandal wind farm          1500           India      [25]
 2           Alta (Oak Creek-Mojave)          1320   United States      [26]
 3               Jaisalmer Wind Park          1064           India      [27]
 4          Shepherds Flat Wind Farm           845   United States      [28]
 5                  Roscoe Wind Farm           782   United States       NaN
 6   Horse Hollow Wind Energ

In [5]:
data[1]

Unnamed: 0,Wind farm,Capacity(MW),Country,Refs
0,Gansu Wind Farm,7965,China,[23][24]
1,Muppandal wind farm,1500,India,[25]
2,Alta (Oak Creek-Mojave),1320,United States,[26]
3,Jaisalmer Wind Park,1064,India,[27]
4,Shepherds Flat Wind Farm,845,United States,[28]
5,Roscoe Wind Farm,782,United States,
6,Horse Hollow Wind Energy Center,736,United States,[29][30]
7,Capricorn Ridge Wind Farm,662,United States,[29][30]
8,Fântânele-Cogealac Wind Farm,600,Romania,[31]
9,Fowler Ridge Wind Farm,600,United States,[32]


In [6]:
#
# normal csv file
#
import pandas as pd
#
pd.read_csv('datasets/bike_share.csv')

Unnamed: 0,dteday,hr,cnt
0,1/1/2011,0,16
1,1/1/2011,1,40
2,1/1/2011,2,32
3,1/1/2011,3,13
4,1/1/2011,4,1
...,...,...,...
17374,12/31/2012,19,119
17375,12/31/2012,20,89
17376,12/31/2012,21,90
17377,12/31/2012,22,61


In [7]:
#
# csv stored in a different character encoding
#
import pandas as pd
#
pd.read_csv('datasets/bike_share_UCS_2_LE_BOM.csv')

UnicodeDecodeError: 'utf-8' codec can't decode byte 0xff in position 0: invalid start byte

In [8]:
#
# specify encoding to read file
#
import pandas as pd
#
pd.read_csv('datasets/bike_share_UCS_2_LE_BOM.csv', 
            encoding = 'utf_16_le')

Unnamed: 0,dteday,hr,cnt
0,1/1/2011,0,16
1,1/1/2011,1,40
2,1/1/2011,2,32
3,1/1/2011,3,13
4,1/1/2011,4,1
...,...,...,...
17374,12/31/2012,19,119
17375,12/31/2012,20,89
17376,12/31/2012,21,90
17377,12/31/2012,22,61


In [9]:
#
# specify encoding and a separator
#
pd.read_csv('datasets/bike_share_UCS_2_LE_BOM.tsv', 
            encoding = 'utf_16_le',
            sep = '\t')

Unnamed: 0,dteday,hr,cnt
0,1/1/2011,0,16
1,1/1/2011,1,40
2,1/1/2011,2,32
3,1/1/2011,3,13
4,1/1/2011,4,1
...,...,...,...
17374,12/31/2012,19,119
17375,12/31/2012,20,89
17376,12/31/2012,21,90
17377,12/31/2012,22,61


In [10]:
import pandas as pd
import sqlite3
#
# connect to bike_share.db -- will create if does not exist
#
conn = sqlite3.connect('datasets/bike_share.db')
c = conn.cursor()
#
# get data into a Pandas DataFrame
#
data = pd.read_csv('datasets/bike_share_UCS_2_LE_BOM.tsv', 
                   encoding = 'utf_16_le',
                   sep = '\t')
#
# this is how we execute SQL language commands
# here, we create a table with three columns
#
c.execute('CREATE TABLE IF NOT EXISTS RENTALS (Date, Hour, Qty)')
conn.commit()
#
# now that we have the table, we can put our data into it
#
data.to_sql("RENTALS", conn, if_exists = 'replace')

In [11]:
#
# json data
#
import requests
#
US_counties_query = \
    requests.get('https://api.census.gov/data/2010/dec/sf1?get=NAME&for=county:*')
US_counties_query.text

'[["NAME","state","county"],\n["Sebastian County, Arkansas","05","131"],\n["Sevier County, Arkansas","05","133"],\n["Sharp County, Arkansas","05","135"],\n["Stone County, Arkansas","05","137"],\n["Union County, Arkansas","05","139"],\n["Van Buren County, Arkansas","05","141"],\n["Washington County, Arkansas","05","143"],\n["White County, Arkansas","05","145"],\n["Yell County, Arkansas","05","149"],\n["Colusa County, California","06","011"],\n["Butte County, California","06","007"],\n["Alameda County, California","06","001"],\n["Alpine County, California","06","003"],\n["Amador County, California","06","005"],\n["Calaveras County, California","06","009"],\n["Contra Costa County, California","06","013"],\n["Del Norte County, California","06","015"],\n["Kings County, California","06","031"],\n["Glenn County, California","06","021"],\n["Humboldt County, California","06","023"],\n["Imperial County, California","06","025"],\n["El Dorado County, California","06","017"],\n["Fresno County, Cali

In [12]:
#
# decode the string into a list, then break down the list
#
import ast
US_counties_data = ast.literal_eval(US_counties_query.text)
[US_counties_data[i] for i in range(len(US_counties_data))]

[['NAME', 'state', 'county'],
 ['Sebastian County, Arkansas', '05', '131'],
 ['Sevier County, Arkansas', '05', '133'],
 ['Sharp County, Arkansas', '05', '135'],
 ['Stone County, Arkansas', '05', '137'],
 ['Union County, Arkansas', '05', '139'],
 ['Van Buren County, Arkansas', '05', '141'],
 ['Washington County, Arkansas', '05', '143'],
 ['White County, Arkansas', '05', '145'],
 ['Yell County, Arkansas', '05', '149'],
 ['Colusa County, California', '06', '011'],
 ['Butte County, California', '06', '007'],
 ['Alameda County, California', '06', '001'],
 ['Alpine County, California', '06', '003'],
 ['Amador County, California', '06', '005'],
 ['Calaveras County, California', '06', '009'],
 ['Contra Costa County, California', '06', '013'],
 ['Del Norte County, California', '06', '015'],
 ['Kings County, California', '06', '031'],
 ['Glenn County, California', '06', '021'],
 ['Humboldt County, California', '06', '023'],
 ['Imperial County, California', '06', '025'],
 ['El Dorado County, Cali

In [13]:
#
# convert json to Pandas
#
import pandas as pd
#
US_counties_data = pd.read_json(US_counties_query.text)
US_counties_data

Unnamed: 0,0,1,2
0,NAME,state,county
1,"Sebastian County, Arkansas",05,131
2,"Sevier County, Arkansas",05,133
3,"Sharp County, Arkansas",05,135
4,"Stone County, Arkansas",05,137
...,...,...,...
3217,"Eau Claire County, Wisconsin",55,035
3218,"Florence County, Wisconsin",55,037
3219,"Fond du Lac County, Wisconsin",55,039
3220,"Forest County, Wisconsin",55,041


In [14]:
#
# we can get the same result using only .read_json()
#
URL = \
'https://api.census.gov/data/2010/dec/sf1?get=NAME&for=county:*'
US_counties_data = pd.read_json(URL).loc[1:, :]
US_counties_data.columns = ['County', 
                            'state_code', 
                            'county_code']
US_counties_data

Unnamed: 0,County,state_code,county_code
1,"Sebastian County, Arkansas",05,131
2,"Sevier County, Arkansas",05,133
3,"Sharp County, Arkansas",05,135
4,"Stone County, Arkansas",05,137
5,"Union County, Arkansas",05,139
...,...,...,...
3217,"Eau Claire County, Wisconsin",55,035
3218,"Florence County, Wisconsin",55,037
3219,"Fond du Lac County, Wisconsin",55,039
3220,"Forest County, Wisconsin",55,041


In [15]:
#
# read html from website
# Page name: Wind power
# Author: Wikipedia contributors
# Publisher: Wikipedia, The Free Encyclopedia.
# Date of last revision: 9 May 2021 22:11 UTC
# Date retrieved: 12 May 2021 23:32 UTC
# Permanent link: https://en.wikipedia.org/w/index.php?title=Wind_power&oldid=1022329353
# Primary contributors: Revision history statistics
# Page Version ID: 1022329353
# Date retrieved: 7 February 2021 20:17 UTC
#
import pandas as pd
#
data_url = 'https://en.wikipedia.org/wiki/Wind_power'
data = pd.read_html(data_url)
len(data)

27

In [16]:
data[2]

Unnamed: 0,Wind farm,Capacity (MW),Country,Turbines and model,Commissioned,Refs
0,Walney Extension,659,United Kingdom,47 x Vestas 8MW 40 x Siemens Gamesa 7MW,2018,[48]
1,London Array,630,United Kingdom,175 × Siemens SWT-3.6,2012,[49][50][51]
2,Gemini Wind Farm,600,The Netherlands,150 × Siemens SWT-4.0,2017,[52]
3,Gwynt y Môr,576,United Kingdom,160 × Siemens SWT-3.6 107,2015,[53]
4,Greater Gabbard,504,United Kingdom,140 × Siemens SWT-3.6,2012,[54]
5,Anholt,400,Denmark,111 × Siemens SWT-3.6–120,2013,[55]
6,BARD Offshore 1,400,Germany,80 BARD 5.0 turbines,2013,[56]


In [17]:
#
# access XML data
#
import pandas as pd
import pandas_read_xml as pdx
pdx.read_xml('https://data.cityofnewyork.us/api/views/825b-niea/rows.xml?accessType=DOWNLOAD',
             ['response', 'row', 'row'],
             root_is_rows = False)

Unnamed: 0,@_id,@_uuid,@_position,@_address,grade,year,category,number_tested,mean_scale_score,level_1_1,level_1_2,level_2_1,level_2_2,level_3_1,level_3_2,level_4_1,level_4_2,level_3_4_1,level_3_4_2
0,row-yvru.xsvq_qzbq,00000000-0000-0000-1B32-87B29F69422E,0,https://data.cityofnewyork.us/resource/_825b-n...,3,2006,Asian,9768,700,243,2.5,543,5.6,4128,42.3,4854,49.7,8982,92.0
1,row-q8z8.q7b3.3ppa,00000000-0000-0000-D9CE-B1F89A0D1307,0,https://data.cityofnewyork.us/resource/_825b-n...,4,2006,Asian,9973,699,294,2.9,600,6.0,4245,42.6,4834,48.5,9079,91.0
2,row-i23x-4prc-46fj,00000000-0000-0000-C9EE-2418870B5F93,0,https://data.cityofnewyork.us/resource/_825b-n...,5,2006,Asian,9852,691,369,3.7,907,9.2,4379,44.4,4197,42.6,8576,87.0
3,row-7u9v-dwwy.fhw3,00000000-0000-0000-17FD-7D50A499A0E1,0,https://data.cityofnewyork.us/resource/_825b-n...,6,2006,Asian,9606,682,452,4.7,1176,12.2,4646,48.4,3332,34.7,7978,83.1
4,row-64kf_k4ma_4zgq,00000000-0000-0000-6A3C-917EFD40527E,0,https://data.cityofnewyork.us/resource/_825b-n...,7,2006,Asian,9433,671,521,5.5,1698,18.0,4690,49.7,2524,26.8,7214,76.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
163,row-i6yz_wbge_khnu,00000000-0000-0000-11E2-D5CA802D0782,0,https://data.cityofnewyork.us/resource/_825b-n...,5,2011,White,10808,699,311,2.9,1709,15.8,4532,41.9,4256,39.4,8788,81.3
164,row-mm88~c8n4.tyfx,00000000-0000-0000-D92B-C091D3670481,0,https://data.cityofnewyork.us/resource/_825b-n...,6,2011,White,9875,695,409,4.1,1818,18.4,3435,34.8,4213,42.7,7648,77.4
165,row-sxyt.p8pk~ziwq,00000000-0000-0000-1695-CBB42E513864,0,https://data.cityofnewyork.us/resource/_825b-n...,7,2011,White,9679,690,423,4.4,1739,18.0,3023,31.2,4494,46.4,7517,77.7
166,row-kd3n_t3yf_bxec,00000000-0000-0000-9CFC-43E494DD5002,0,https://data.cityofnewyork.us/resource/_825b-n...,8,2011,White,9570,688,433,4.5,2190,22.9,4142,43.3,2805,29.3,6947,72.6


In [18]:
#
# a file with contents that aren't part of the data
#
import pandas as pd
#
sensor_data = pd.read_excel('datasets/sensor_data.xlsx',
                            usecols = [2, 3, 4, 5],
                            header = 3,
                            sheet_name = '20210117_0037',
                            engine = 'openpyxl')
sensor_data

Unnamed: 0,time,s1,s2,s3
0,0.95924,0.234046,3.514755,0.447823
1,0.96424,0.171669,4.837437,0.495071
2,0.96924,0.271542,4.673110,0.383604
3,0.97424,0.057020,3.048180,0.193946
4,0.97924,0.062937,5.631988,0.338150
...,...,...,...,...
10669,54.30424,15.066911,7.506722,29.028388
10670,54.30924,17.264761,10.195260,24.272862
10671,54.31424,9.744161,7.956116,10.244286
10672,54.31924,1.722525,10.254374,2.513277


In [19]:
#
# store the cleaned sensor data to a new file in Excel format
#
sensor_data.to_excel('datasets/sensor_data_clean.xlsx', 
                     sheet_name = 'sensor_data',
                     index = None,
                     engine = 'openpyxl')

In [20]:
#
# read sas data
#
# sample from http://www.principlesofeconometrics.com/sas.htm
#
import pandas as pd
#
data = pd.read_sas('datasets/airline.sas7bdat')
data.head()

Unnamed: 0,YEAR,Y,W,R,L,K
0,1948.0,1.214,0.243,0.1454,1.415,0.612
1,1949.0,1.354,0.26,0.2181,1.384,0.559
2,1950.0,1.569,0.278,0.3157,1.388,0.573
3,1951.0,1.948,0.297,0.394,1.55,0.564
4,1952.0,2.265,0.31,0.3559,1.802,0.574


In [1]:
#
# read spss data
#
# example from http://calcnet.mth.cmich.edu/org/spss/prj_body_fat_data.htm
# requires optional dependency 'pyreadstat'; use pip or conda to install pyreadstat
#
import pandas as pd
data = pd.read_spss('datasets/bodyfat.sav')
data.head()

Unnamed: 0,y,x1,x2,x3
0,19.5,43.1,29.1,11.9
1,24.7,49.8,28.2,22.8
2,30.7,51.9,37.0,18.7
3,29.8,54.3,31.1,20.1
4,19.1,42.2,30.9,12.9


In [2]:
#
# read stata data
#
# example from https://www.federalreserve.gov/econres/scfindex.htm
#
import pandas as pd
#
data = pd.read_stata('datasets/rscfp2019.dta')
print('data:\n', data.head(2))
data.to_stata('datasets/rscfp2019_write.dta', write_index = False)
data2 = pd.read_stata('datasets/rscfp2019_write.dta')
print('data2:\n', data2.head(2))
print('differences between rscfp2019 and rscfp2019_write:\n', 
      data.compare(data2))

data:
    yy1  y1          wgt  hhsex  age  agecl  educ  edcl  married  kids  ...  \
0    1  11  6119.779308      2   75      6    12     4        2     0  ...   
1    1  12  4712.374912      2   75      6    12     4        2     0  ...   

   nwcat  inccat  assetcat  ninccat  ninc2cat  nwpctlecat  incpctlecat  \
0      5       3         6        3         2          10            6   
1      5       3         6        3         1          10            5   

   nincpctlecat  incqrtcat  nincqrtcat  
0             6          3           3  
1             5          2           2  

[2 rows x 351 columns]
data2:
    yy1  y1          wgt  hhsex  age  agecl  educ  edcl  married  kids  ...  \
0    1  11  6119.779308      2   75      6    12     4        2     0  ...   
1    1  12  4712.374912      2   75      6    12     4        2     0  ...   

   nwcat  inccat  assetcat  ninccat  ninc2cat  nwpctlecat  incpctlecat  \
0      5       3         6        3         2          10            6 

In [1]:
#
# work with hdf5 data
#
import pandas as pd
import numpy as np
#
time = np.arange(0, 100, 0.01)
values = np.sin(2 * np.pi * time / 17)
data = pd.DataFrame({'time': time, 'data': values})
#
data.to_hdf('store_data_h5.h5', 'table', append = True)
data_reread = pd.read_hdf('store_data_h5.h5', 'table', where = ['index > 9'])
data_reread.head()

Unnamed: 0,time,data
10,0.1,0.036951
11,0.11,0.040645
12,0.12,0.044337
13,0.13,0.048029
14,0.14,0.051721


In [1]:
import pandas as pd
import sqlite3
tables = \
    pd.read_sql(
    "SELECT name FROM sqlite_master WHERE type = 'table' ORDER BY name ASC", 
    sqlite3.connect('datasets/pet_stores.db'))
tables

Unnamed: 0,name
0,Customers
1,Invoices


In [3]:
#
# use Pandas to make a copy of the database
#
import pandas as pd
import sqlite3
stores = pd.read_sql("SELECT * FROM Customers",
                     sqlite3.connect('datasets/pet_stores.db'))
invoices = pd.read_sql("SELECT * FROM Invoices",
                     sqlite3.connect('datasets/pet_stores.db'))
#
stores.to_sql("Customers", 
              sqlite3.connect('datasets/pet_stores_2.db'), 
              if_exists = 'replace',
              index = True)
invoices.to_sql("Invoices",
                sqlite3.connect('datasets/pet_stores_2.db'), 
                if_exists = 'replace',
                index = True)

In [4]:
#
# read all the customers
#
customers = \
    pd.read_sql(
    'select Customer_Number, Company, City, State from Customers', 
    sqlite3.connect('datasets/pet_stores_2.db'))
customers

Unnamed: 0,Customer_Number,Company,City,State
0,15846,Pet Radio,Minneapolis,MN
1,13197,Just Pets,Columbus,OH
2,11154,Love Strays,Pittsburgh,PA
3,15540,WebPet,Mesa,AZ
4,18397,Pet-ng-Zoo,San Antonio,TX
5,17293,Pet Fud,St. Paul,MN
6,19977,Canine Cravings,Henderson,NV
7,15238,Stock Ur Pet,Stockton,CA
8,15217,Kittle Lullaby,New Orleans,LA
9,17114,Big Dogs Only,Anchorage,AK


In [5]:
#
# read the customers from TX
#
TX_customers = \
    pd.read_sql(
    "select Customer_Number, Company, City, State from Customers " + 
    "WHERE State = 'TX'", 
    sqlite3.connect('datasets/pet_stores_2.db'))
TX_customers

Unnamed: 0,Customer_Number,Company,City,State
0,18397,Pet-ng-Zoo,San Antonio,TX
1,18448,K9s4Ever,Dallas,TX
2,11485,GrrrtoPurr,Plano,TX


In [6]:
customers.loc[customers['State'] == 'TX', :]

Unnamed: 0,Customer_Number,Company,City,State
4,18397,Pet-ng-Zoo,San Antonio,TX
10,18448,K9s4Ever,Dallas,TX
12,11485,GrrrtoPurr,Plano,TX


In [7]:
#
# get the invoices table
#
invoices = pd.read_sql("select * from Invoices", 
                       sqlite3.connect('datasets/pet_stores_2.db'))
print(invoices.head(3), '\n', invoices.tail(3))

   level_0  index       Date  Customer_Number        Invoice   Amount
0        0      0  2/20/2020            18397  2020022018397  1038.95
1        1      1  2/25/2020            17114  2020022517114  1523.97
2        2      2  2/25/2020            15846  2020022515846  1535.56 
     level_0  index       Date  Customer_Number        Invoice   Amount
35       35     35  3/19/2020            17114  2020031917114  1041.22
36       36     36  3/19/2020            13388  2020031913388  1043.63
37       37     37  3/24/2020            15217  2020032415217  1542.85


In [8]:
#
# get new invoices from csv file
#
new_invoices = pd.read_csv('datasets/new_invoices.csv')
new_invoices

Unnamed: 0,Date,Customer_Number,Invoice,Amount
0,3/24/2020,15846,2020032415846,1355.73
1,3/24/2020,17293,2020032417293,1375.67
2,3/24/2020,18448,2020032418448,1415.38
3,3/24/2020,11485,2020032411485,1025.46
4,3/25/2020,11154,2020032511154,1245.01
5,3/25/2020,13388,2020032513388,1055.32
6,3/25/2020,13197,2020032513197,1105.15
7,3/25/2020,15217,2020032515217,1495.33
8,3/26/2020,17114,2020032617114,1185.3
9,3/26/2020,13197,2020032613197,1290.44


In [9]:
#
# update the DataFrame index so it extends the index
#
new_invoices.index = list(range(invoices['index'].max() + 1, 
                                invoices['index'].max() + 
                                new_invoices.shape[0] + 1))
new_invoices

Unnamed: 0,Date,Customer_Number,Invoice,Amount
38,3/24/2020,15846,2020032415846,1355.73
39,3/24/2020,17293,2020032417293,1375.67
40,3/24/2020,18448,2020032418448,1415.38
41,3/24/2020,11485,2020032411485,1025.46
42,3/25/2020,11154,2020032511154,1245.01
43,3/25/2020,13388,2020032513388,1055.32
44,3/25/2020,13197,2020032513197,1105.15
45,3/25/2020,15217,2020032515217,1495.33
46,3/26/2020,17114,2020032617114,1185.3
47,3/26/2020,13197,2020032613197,1290.44


In [10]:
#
# add the invoices to the table
#
new_invoices.to_sql("Invoices", 
                     sqlite3.connect('datasets/pet_stores_2.db'), 
                     if_exists = 'append',
                     index = True)

In [11]:
invoices = pd.read_sql("select * from Invoices", 
                       sqlite3.connect('datasets/pet_stores_2.db'))
print(invoices.head(), '\n', invoices.tail())

   level_0  index       Date  Customer_Number        Invoice   Amount
0      0.0      0  2/20/2020            18397  2020022018397  1038.95
1      1.0      1  2/25/2020            17114  2020022517114  1523.97
2      2.0      2  2/25/2020            15846  2020022515846  1535.56
3      3.0      3  2/25/2020            15540  2020022515540  1568.95
4      4.0      4  2/26/2020            18448  2020022618448  1509.51 
     level_0  index       Date  Customer_Number        Invoice   Amount
45      NaN     45  3/25/2020            15217  2020032515217  1495.33
46      NaN     46  3/26/2020            17114  2020032617114  1185.30
47      NaN     47  3/26/2020            13197  2020032613197  1290.44
48      NaN     48  3/26/2020            15238  2020032615238  1170.75
49      NaN     49  3/26/2020            18397  2020032618397  1330.36
